yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import hashlib
  20 import hmac
  21 import imp
  22 import io
  23 import itertools
  24 import json
  25 import locale
  26 import math
  27 import operator
  28 import os
  29 import platform
  30 import random
  31 import re
  32 import socket
  33 import ssl
  34 import subprocess
  35 import sys
  36 import tempfile
  37 import time
  38 import traceback
  39 import xml.etree.ElementTree
  40 import zlib
  41
  42 from .compat import (
  43     compat_HTMLParseError,
  44     compat_HTMLParser,
  45     compat_HTTPError,
  46     compat_basestring,
  47     compat_chr,
  48     compat_cookiejar,
  49     compat_ctypes_WINFUNCTYPE,
  50     compat_etree_fromstring,
  51     compat_expanduser,
  52     compat_html_entities,
  53     compat_html_entities_html5,
  54     compat_http_client,
  55     compat_integer_types,
  56     compat_numeric_types,
  57     compat_kwargs,
  58     compat_os_name,
  59     compat_parse_qs,
  60     compat_shlex_quote,
  61     compat_str,
  62     compat_struct_pack,
  63     compat_struct_unpack,
  64     compat_urllib_error,
  65     compat_urllib_parse,
  66     compat_urllib_parse_urlencode,
  67     compat_urllib_parse_urlparse,
  68     compat_urllib_parse_urlunparse,
  69     compat_urllib_parse_quote,
  70     compat_urllib_parse_quote_plus,
  71     compat_urllib_parse_unquote_plus,
  72     compat_urllib_request,
  73     compat_urlparse,
  74     compat_xpath,
  75 )
  76
  77 from .socks import (
  78     ProxyType,
  79     sockssocket,
  80 )
  81
  82
  83 def register_socks_protocols():
  84     # "Register" SOCKS protocols
  85     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  86     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  87     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  88         if scheme not in compat_urlparse.uses_netloc:
  89             compat_urlparse.uses_netloc.append(scheme)
  90
  91
  92 # This is not clearly defined otherwise
  93 compiled_regex_type = type(re.compile(''))
  94
  95
  96 def random_user_agent():
  97     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  98     _CHROME_VERSIONS = (
  99         '74.0.3729.129',
 100         '76.0.3780.3',
 101         '76.0.3780.2',
 102         '74.0.3729.128',
 103         '76.0.3780.1',
 104         '76.0.3780.0',
 105         '75.0.3770.15',
 106         '74.0.3729.127',
 107         '74.0.3729.126',
 108         '76.0.3779.1',
 109         '76.0.3779.0',
 110         '75.0.3770.14',
 111         '74.0.3729.125',
 112         '76.0.3778.1',
 113         '76.0.3778.0',
 114         '75.0.3770.13',
 115         '74.0.3729.124',
 116         '74.0.3729.123',
 117         '73.0.3683.121',
 118         '76.0.3777.1',
 119         '76.0.3777.0',
 120         '75.0.3770.12',
 121         '74.0.3729.122',
 122         '76.0.3776.4',
 123         '75.0.3770.11',
 124         '74.0.3729.121',
 125         '76.0.3776.3',
 126         '76.0.3776.2',
 127         '73.0.3683.120',
 128         '74.0.3729.120',
 129         '74.0.3729.119',
 130         '74.0.3729.118',
 131         '76.0.3776.1',
 132         '76.0.3776.0',
 133         '76.0.3775.5',
 134         '75.0.3770.10',
 135         '74.0.3729.117',
 136         '76.0.3775.4',
 137         '76.0.3775.3',
 138         '74.0.3729.116',
 139         '75.0.3770.9',
 140         '76.0.3775.2',
 141         '76.0.3775.1',
 142         '76.0.3775.0',
 143         '75.0.3770.8',
 144         '74.0.3729.115',
 145         '74.0.3729.114',
 146         '76.0.3774.1',
 147         '76.0.3774.0',
 148         '75.0.3770.7',
 149         '74.0.3729.113',
 150         '74.0.3729.112',
 151         '74.0.3729.111',
 152         '76.0.3773.1',
 153         '76.0.3773.0',
 154         '75.0.3770.6',
 155         '74.0.3729.110',
 156         '74.0.3729.109',
 157         '76.0.3772.1',
 158         '76.0.3772.0',
 159         '75.0.3770.5',
 160         '74.0.3729.108',
 161         '74.0.3729.107',
 162         '76.0.3771.1',
 163         '76.0.3771.0',
 164         '75.0.3770.4',
 165         '74.0.3729.106',
 166         '74.0.3729.105',
 167         '75.0.3770.3',
 168         '74.0.3729.104',
 169         '74.0.3729.103',
 170         '74.0.3729.102',
 171         '75.0.3770.2',
 172         '74.0.3729.101',
 173         '75.0.3770.1',
 174         '75.0.3770.0',
 175         '74.0.3729.100',
 176         '75.0.3769.5',
 177         '75.0.3769.4',
 178         '74.0.3729.99',
 179         '75.0.3769.3',
 180         '75.0.3769.2',
 181         '75.0.3768.6',
 182         '74.0.3729.98',
 183         '75.0.3769.1',
 184         '75.0.3769.0',
 185         '74.0.3729.97',
 186         '73.0.3683.119',
 187         '73.0.3683.118',
 188         '74.0.3729.96',
 189         '75.0.3768.5',
 190         '75.0.3768.4',
 191         '75.0.3768.3',
 192         '75.0.3768.2',
 193         '74.0.3729.95',
 194         '74.0.3729.94',
 195         '75.0.3768.1',
 196         '75.0.3768.0',
 197         '74.0.3729.93',
 198         '74.0.3729.92',
 199         '73.0.3683.117',
 200         '74.0.3729.91',
 201         '75.0.3766.3',
 202         '74.0.3729.90',
 203         '75.0.3767.2',
 204         '75.0.3767.1',
 205         '75.0.3767.0',
 206         '74.0.3729.89',
 207         '73.0.3683.116',
 208         '75.0.3766.2',
 209         '74.0.3729.88',
 210         '75.0.3766.1',
 211         '75.0.3766.0',
 212         '74.0.3729.87',
 213         '73.0.3683.115',
 214         '74.0.3729.86',
 215         '75.0.3765.1',
 216         '75.0.3765.0',
 217         '74.0.3729.85',
 218         '73.0.3683.114',
 219         '74.0.3729.84',
 220         '75.0.3764.1',
 221         '75.0.3764.0',
 222         '74.0.3729.83',
 223         '73.0.3683.113',
 224         '75.0.3763.2',
 225         '75.0.3761.4',
 226         '74.0.3729.82',
 227         '75.0.3763.1',
 228         '75.0.3763.0',
 229         '74.0.3729.81',
 230         '73.0.3683.112',
 231         '75.0.3762.1',
 232         '75.0.3762.0',
 233         '74.0.3729.80',
 234         '75.0.3761.3',
 235         '74.0.3729.79',
 236         '73.0.3683.111',
 237         '75.0.3761.2',
 238         '74.0.3729.78',
 239         '74.0.3729.77',
 240         '75.0.3761.1',
 241         '75.0.3761.0',
 242         '73.0.3683.110',
 243         '74.0.3729.76',
 244         '74.0.3729.75',
 245         '75.0.3760.0',
 246         '74.0.3729.74',
 247         '75.0.3759.8',
 248         '75.0.3759.7',
 249         '75.0.3759.6',
 250         '74.0.3729.73',
 251         '75.0.3759.5',
 252         '74.0.3729.72',
 253         '73.0.3683.109',
 254         '75.0.3759.4',
 255         '75.0.3759.3',
 256         '74.0.3729.71',
 257         '75.0.3759.2',
 258         '74.0.3729.70',
 259         '73.0.3683.108',
 260         '74.0.3729.69',
 261         '75.0.3759.1',
 262         '75.0.3759.0',
 263         '74.0.3729.68',
 264         '73.0.3683.107',
 265         '74.0.3729.67',
 266         '75.0.3758.1',
 267         '75.0.3758.0',
 268         '74.0.3729.66',
 269         '73.0.3683.106',
 270         '74.0.3729.65',
 271         '75.0.3757.1',
 272         '75.0.3757.0',
 273         '74.0.3729.64',
 274         '73.0.3683.105',
 275         '74.0.3729.63',
 276         '75.0.3756.1',
 277         '75.0.3756.0',
 278         '74.0.3729.62',
 279         '73.0.3683.104',
 280         '75.0.3755.3',
 281         '75.0.3755.2',
 282         '73.0.3683.103',
 283         '75.0.3755.1',
 284         '75.0.3755.0',
 285         '74.0.3729.61',
 286         '73.0.3683.102',
 287         '74.0.3729.60',
 288         '75.0.3754.2',
 289         '74.0.3729.59',
 290         '75.0.3753.4',
 291         '74.0.3729.58',
 292         '75.0.3754.1',
 293         '75.0.3754.0',
 294         '74.0.3729.57',
 295         '73.0.3683.101',
 296         '75.0.3753.3',
 297         '75.0.3752.2',
 298         '75.0.3753.2',
 299         '74.0.3729.56',
 300         '75.0.3753.1',
 301         '75.0.3753.0',
 302         '74.0.3729.55',
 303         '73.0.3683.100',
 304         '74.0.3729.54',
 305         '75.0.3752.1',
 306         '75.0.3752.0',
 307         '74.0.3729.53',
 308         '73.0.3683.99',
 309         '74.0.3729.52',
 310         '75.0.3751.1',
 311         '75.0.3751.0',
 312         '74.0.3729.51',
 313         '73.0.3683.98',
 314         '74.0.3729.50',
 315         '75.0.3750.0',
 316         '74.0.3729.49',
 317         '74.0.3729.48',
 318         '74.0.3729.47',
 319         '75.0.3749.3',
 320         '74.0.3729.46',
 321         '73.0.3683.97',
 322         '75.0.3749.2',
 323         '74.0.3729.45',
 324         '75.0.3749.1',
 325         '75.0.3749.0',
 326         '74.0.3729.44',
 327         '73.0.3683.96',
 328         '74.0.3729.43',
 329         '74.0.3729.42',
 330         '75.0.3748.1',
 331         '75.0.3748.0',
 332         '74.0.3729.41',
 333         '75.0.3747.1',
 334         '73.0.3683.95',
 335         '75.0.3746.4',
 336         '74.0.3729.40',
 337         '74.0.3729.39',
 338         '75.0.3747.0',
 339         '75.0.3746.3',
 340         '75.0.3746.2',
 341         '74.0.3729.38',
 342         '75.0.3746.1',
 343         '75.0.3746.0',
 344         '74.0.3729.37',
 345         '73.0.3683.94',
 346         '75.0.3745.5',
 347         '75.0.3745.4',
 348         '75.0.3745.3',
 349         '75.0.3745.2',
 350         '74.0.3729.36',
 351         '75.0.3745.1',
 352         '75.0.3745.0',
 353         '75.0.3744.2',
 354         '74.0.3729.35',
 355         '73.0.3683.93',
 356         '74.0.3729.34',
 357         '75.0.3744.1',
 358         '75.0.3744.0',
 359         '74.0.3729.33',
 360         '73.0.3683.92',
 361         '74.0.3729.32',
 362         '74.0.3729.31',
 363         '73.0.3683.91',
 364         '75.0.3741.2',
 365         '75.0.3740.5',
 366         '74.0.3729.30',
 367         '75.0.3741.1',
 368         '75.0.3741.0',
 369         '74.0.3729.29',
 370         '75.0.3740.4',
 371         '73.0.3683.90',
 372         '74.0.3729.28',
 373         '75.0.3740.3',
 374         '73.0.3683.89',
 375         '75.0.3740.2',
 376         '74.0.3729.27',
 377         '75.0.3740.1',
 378         '75.0.3740.0',
 379         '74.0.3729.26',
 380         '73.0.3683.88',
 381         '73.0.3683.87',
 382         '74.0.3729.25',
 383         '75.0.3739.1',
 384         '75.0.3739.0',
 385         '73.0.3683.86',
 386         '74.0.3729.24',
 387         '73.0.3683.85',
 388         '75.0.3738.4',
 389         '75.0.3738.3',
 390         '75.0.3738.2',
 391         '75.0.3738.1',
 392         '75.0.3738.0',
 393         '74.0.3729.23',
 394         '73.0.3683.84',
 395         '74.0.3729.22',
 396         '74.0.3729.21',
 397         '75.0.3737.1',
 398         '75.0.3737.0',
 399         '74.0.3729.20',
 400         '73.0.3683.83',
 401         '74.0.3729.19',
 402         '75.0.3736.1',
 403         '75.0.3736.0',
 404         '74.0.3729.18',
 405         '73.0.3683.82',
 406         '74.0.3729.17',
 407         '75.0.3735.1',
 408         '75.0.3735.0',
 409         '74.0.3729.16',
 410         '73.0.3683.81',
 411         '75.0.3734.1',
 412         '75.0.3734.0',
 413         '74.0.3729.15',
 414         '73.0.3683.80',
 415         '74.0.3729.14',
 416         '75.0.3733.1',
 417         '75.0.3733.0',
 418         '75.0.3732.1',
 419         '74.0.3729.13',
 420         '74.0.3729.12',
 421         '73.0.3683.79',
 422         '74.0.3729.11',
 423         '75.0.3732.0',
 424         '74.0.3729.10',
 425         '73.0.3683.78',
 426         '74.0.3729.9',
 427         '74.0.3729.8',
 428         '74.0.3729.7',
 429         '75.0.3731.3',
 430         '75.0.3731.2',
 431         '75.0.3731.0',
 432         '74.0.3729.6',
 433         '73.0.3683.77',
 434         '73.0.3683.76',
 435         '75.0.3730.5',
 436         '75.0.3730.4',
 437         '73.0.3683.75',
 438         '74.0.3729.5',
 439         '73.0.3683.74',
 440         '75.0.3730.3',
 441         '75.0.3730.2',
 442         '74.0.3729.4',
 443         '73.0.3683.73',
 444         '73.0.3683.72',
 445         '75.0.3730.1',
 446         '75.0.3730.0',
 447         '74.0.3729.3',
 448         '73.0.3683.71',
 449         '74.0.3729.2',
 450         '73.0.3683.70',
 451         '74.0.3729.1',
 452         '74.0.3729.0',
 453         '74.0.3726.4',
 454         '73.0.3683.69',
 455         '74.0.3726.3',
 456         '74.0.3728.0',
 457         '74.0.3726.2',
 458         '73.0.3683.68',
 459         '74.0.3726.1',
 460         '74.0.3726.0',
 461         '74.0.3725.4',
 462         '73.0.3683.67',
 463         '73.0.3683.66',
 464         '74.0.3725.3',
 465         '74.0.3725.2',
 466         '74.0.3725.1',
 467         '74.0.3724.8',
 468         '74.0.3725.0',
 469         '73.0.3683.65',
 470         '74.0.3724.7',
 471         '74.0.3724.6',
 472         '74.0.3724.5',
 473         '74.0.3724.4',
 474         '74.0.3724.3',
 475         '74.0.3724.2',
 476         '74.0.3724.1',
 477         '74.0.3724.0',
 478         '73.0.3683.64',
 479         '74.0.3723.1',
 480         '74.0.3723.0',
 481         '73.0.3683.63',
 482         '74.0.3722.1',
 483         '74.0.3722.0',
 484         '73.0.3683.62',
 485         '74.0.3718.9',
 486         '74.0.3702.3',
 487         '74.0.3721.3',
 488         '74.0.3721.2',
 489         '74.0.3721.1',
 490         '74.0.3721.0',
 491         '74.0.3720.6',
 492         '73.0.3683.61',
 493         '72.0.3626.122',
 494         '73.0.3683.60',
 495         '74.0.3720.5',
 496         '72.0.3626.121',
 497         '74.0.3718.8',
 498         '74.0.3720.4',
 499         '74.0.3720.3',
 500         '74.0.3718.7',
 501         '74.0.3720.2',
 502         '74.0.3720.1',
 503         '74.0.3720.0',
 504         '74.0.3718.6',
 505         '74.0.3719.5',
 506         '73.0.3683.59',
 507         '74.0.3718.5',
 508         '74.0.3718.4',
 509         '74.0.3719.4',
 510         '74.0.3719.3',
 511         '74.0.3719.2',
 512         '74.0.3719.1',
 513         '73.0.3683.58',
 514         '74.0.3719.0',
 515         '73.0.3683.57',
 516         '73.0.3683.56',
 517         '74.0.3718.3',
 518         '73.0.3683.55',
 519         '74.0.3718.2',
 520         '74.0.3718.1',
 521         '74.0.3718.0',
 522         '73.0.3683.54',
 523         '74.0.3717.2',
 524         '73.0.3683.53',
 525         '74.0.3717.1',
 526         '74.0.3717.0',
 527         '73.0.3683.52',
 528         '74.0.3716.1',
 529         '74.0.3716.0',
 530         '73.0.3683.51',
 531         '74.0.3715.1',
 532         '74.0.3715.0',
 533         '73.0.3683.50',
 534         '74.0.3711.2',
 535         '74.0.3714.2',
 536         '74.0.3713.3',
 537         '74.0.3714.1',
 538         '74.0.3714.0',
 539         '73.0.3683.49',
 540         '74.0.3713.1',
 541         '74.0.3713.0',
 542         '72.0.3626.120',
 543         '73.0.3683.48',
 544         '74.0.3712.2',
 545         '74.0.3712.1',
 546         '74.0.3712.0',
 547         '73.0.3683.47',
 548         '72.0.3626.119',
 549         '73.0.3683.46',
 550         '74.0.3710.2',
 551         '72.0.3626.118',
 552         '74.0.3711.1',
 553         '74.0.3711.0',
 554         '73.0.3683.45',
 555         '72.0.3626.117',
 556         '74.0.3710.1',
 557         '74.0.3710.0',
 558         '73.0.3683.44',
 559         '72.0.3626.116',
 560         '74.0.3709.1',
 561         '74.0.3709.0',
 562         '74.0.3704.9',
 563         '73.0.3683.43',
 564         '72.0.3626.115',
 565         '74.0.3704.8',
 566         '74.0.3704.7',
 567         '74.0.3708.0',
 568         '74.0.3706.7',
 569         '74.0.3704.6',
 570         '73.0.3683.42',
 571         '72.0.3626.114',
 572         '74.0.3706.6',
 573         '72.0.3626.113',
 574         '74.0.3704.5',
 575         '74.0.3706.5',
 576         '74.0.3706.4',
 577         '74.0.3706.3',
 578         '74.0.3706.2',
 579         '74.0.3706.1',
 580         '74.0.3706.0',
 581         '73.0.3683.41',
 582         '72.0.3626.112',
 583         '74.0.3705.1',
 584         '74.0.3705.0',
 585         '73.0.3683.40',
 586         '72.0.3626.111',
 587         '73.0.3683.39',
 588         '74.0.3704.4',
 589         '73.0.3683.38',
 590         '74.0.3704.3',
 591         '74.0.3704.2',
 592         '74.0.3704.1',
 593         '74.0.3704.0',
 594         '73.0.3683.37',
 595         '72.0.3626.110',
 596         '72.0.3626.109',
 597         '74.0.3703.3',
 598         '74.0.3703.2',
 599         '73.0.3683.36',
 600         '74.0.3703.1',
 601         '74.0.3703.0',
 602         '73.0.3683.35',
 603         '72.0.3626.108',
 604         '74.0.3702.2',
 605         '74.0.3699.3',
 606         '74.0.3702.1',
 607         '74.0.3702.0',
 608         '73.0.3683.34',
 609         '72.0.3626.107',
 610         '73.0.3683.33',
 611         '74.0.3701.1',
 612         '74.0.3701.0',
 613         '73.0.3683.32',
 614         '73.0.3683.31',
 615         '72.0.3626.105',
 616         '74.0.3700.1',
 617         '74.0.3700.0',
 618         '73.0.3683.29',
 619         '72.0.3626.103',
 620         '74.0.3699.2',
 621         '74.0.3699.1',
 622         '74.0.3699.0',
 623         '73.0.3683.28',
 624         '72.0.3626.102',
 625         '73.0.3683.27',
 626         '73.0.3683.26',
 627         '74.0.3698.0',
 628         '74.0.3696.2',
 629         '72.0.3626.101',
 630         '73.0.3683.25',
 631         '74.0.3696.1',
 632         '74.0.3696.0',
 633         '74.0.3694.8',
 634         '72.0.3626.100',
 635         '74.0.3694.7',
 636         '74.0.3694.6',
 637         '74.0.3694.5',
 638         '74.0.3694.4',
 639         '72.0.3626.99',
 640         '72.0.3626.98',
 641         '74.0.3694.3',
 642         '73.0.3683.24',
 643         '72.0.3626.97',
 644         '72.0.3626.96',
 645         '72.0.3626.95',
 646         '73.0.3683.23',
 647         '72.0.3626.94',
 648         '73.0.3683.22',
 649         '73.0.3683.21',
 650         '72.0.3626.93',
 651         '74.0.3694.2',
 652         '72.0.3626.92',
 653         '74.0.3694.1',
 654         '74.0.3694.0',
 655         '74.0.3693.6',
 656         '73.0.3683.20',
 657         '72.0.3626.91',
 658         '74.0.3693.5',
 659         '74.0.3693.4',
 660         '74.0.3693.3',
 661         '74.0.3693.2',
 662         '73.0.3683.19',
 663         '74.0.3693.1',
 664         '74.0.3693.0',
 665         '73.0.3683.18',
 666         '72.0.3626.90',
 667         '74.0.3692.1',
 668         '74.0.3692.0',
 669         '73.0.3683.17',
 670         '72.0.3626.89',
 671         '74.0.3687.3',
 672         '74.0.3691.1',
 673         '74.0.3691.0',
 674         '73.0.3683.16',
 675         '72.0.3626.88',
 676         '72.0.3626.87',
 677         '73.0.3683.15',
 678         '74.0.3690.1',
 679         '74.0.3690.0',
 680         '73.0.3683.14',
 681         '72.0.3626.86',
 682         '73.0.3683.13',
 683         '73.0.3683.12',
 684         '74.0.3689.1',
 685         '74.0.3689.0',
 686         '73.0.3683.11',
 687         '72.0.3626.85',
 688         '73.0.3683.10',
 689         '72.0.3626.84',
 690         '73.0.3683.9',
 691         '74.0.3688.1',
 692         '74.0.3688.0',
 693         '73.0.3683.8',
 694         '72.0.3626.83',
 695         '74.0.3687.2',
 696         '74.0.3687.1',
 697         '74.0.3687.0',
 698         '73.0.3683.7',
 699         '72.0.3626.82',
 700         '74.0.3686.4',
 701         '72.0.3626.81',
 702         '74.0.3686.3',
 703         '74.0.3686.2',
 704         '74.0.3686.1',
 705         '74.0.3686.0',
 706         '73.0.3683.6',
 707         '72.0.3626.80',
 708         '74.0.3685.1',
 709         '74.0.3685.0',
 710         '73.0.3683.5',
 711         '72.0.3626.79',
 712         '74.0.3684.1',
 713         '74.0.3684.0',
 714         '73.0.3683.4',
 715         '72.0.3626.78',
 716         '72.0.3626.77',
 717         '73.0.3683.3',
 718         '73.0.3683.2',
 719         '72.0.3626.76',
 720         '73.0.3683.1',
 721         '73.0.3683.0',
 722         '72.0.3626.75',
 723         '71.0.3578.141',
 724         '73.0.3682.1',
 725         '73.0.3682.0',
 726         '72.0.3626.74',
 727         '71.0.3578.140',
 728         '73.0.3681.4',
 729         '73.0.3681.3',
 730         '73.0.3681.2',
 731         '73.0.3681.1',
 732         '73.0.3681.0',
 733         '72.0.3626.73',
 734         '71.0.3578.139',
 735         '72.0.3626.72',
 736         '72.0.3626.71',
 737         '73.0.3680.1',
 738         '73.0.3680.0',
 739         '72.0.3626.70',
 740         '71.0.3578.138',
 741         '73.0.3678.2',
 742         '73.0.3679.1',
 743         '73.0.3679.0',
 744         '72.0.3626.69',
 745         '71.0.3578.137',
 746         '73.0.3678.1',
 747         '73.0.3678.0',
 748         '71.0.3578.136',
 749         '73.0.3677.1',
 750         '73.0.3677.0',
 751         '72.0.3626.68',
 752         '72.0.3626.67',
 753         '71.0.3578.135',
 754         '73.0.3676.1',
 755         '73.0.3676.0',
 756         '73.0.3674.2',
 757         '72.0.3626.66',
 758         '71.0.3578.134',
 759         '73.0.3674.1',
 760         '73.0.3674.0',
 761         '72.0.3626.65',
 762         '71.0.3578.133',
 763         '73.0.3673.2',
 764         '73.0.3673.1',
 765         '73.0.3673.0',
 766         '72.0.3626.64',
 767         '71.0.3578.132',
 768         '72.0.3626.63',
 769         '72.0.3626.62',
 770         '72.0.3626.61',
 771         '72.0.3626.60',
 772         '73.0.3672.1',
 773         '73.0.3672.0',
 774         '72.0.3626.59',
 775         '71.0.3578.131',
 776         '73.0.3671.3',
 777         '73.0.3671.2',
 778         '73.0.3671.1',
 779         '73.0.3671.0',
 780         '72.0.3626.58',
 781         '71.0.3578.130',
 782         '73.0.3670.1',
 783         '73.0.3670.0',
 784         '72.0.3626.57',
 785         '71.0.3578.129',
 786         '73.0.3669.1',
 787         '73.0.3669.0',
 788         '72.0.3626.56',
 789         '71.0.3578.128',
 790         '73.0.3668.2',
 791         '73.0.3668.1',
 792         '73.0.3668.0',
 793         '72.0.3626.55',
 794         '71.0.3578.127',
 795         '73.0.3667.2',
 796         '73.0.3667.1',
 797         '73.0.3667.0',
 798         '72.0.3626.54',
 799         '71.0.3578.126',
 800         '73.0.3666.1',
 801         '73.0.3666.0',
 802         '72.0.3626.53',
 803         '71.0.3578.125',
 804         '73.0.3665.4',
 805         '73.0.3665.3',
 806         '72.0.3626.52',
 807         '73.0.3665.2',
 808         '73.0.3664.4',
 809         '73.0.3665.1',
 810         '73.0.3665.0',
 811         '72.0.3626.51',
 812         '71.0.3578.124',
 813         '72.0.3626.50',
 814         '73.0.3664.3',
 815         '73.0.3664.2',
 816         '73.0.3664.1',
 817         '73.0.3664.0',
 818         '73.0.3663.2',
 819         '72.0.3626.49',
 820         '71.0.3578.123',
 821         '73.0.3663.1',
 822         '73.0.3663.0',
 823         '72.0.3626.48',
 824         '71.0.3578.122',
 825         '73.0.3662.1',
 826         '73.0.3662.0',
 827         '72.0.3626.47',
 828         '71.0.3578.121',
 829         '73.0.3661.1',
 830         '72.0.3626.46',
 831         '73.0.3661.0',
 832         '72.0.3626.45',
 833         '71.0.3578.120',
 834         '73.0.3660.2',
 835         '73.0.3660.1',
 836         '73.0.3660.0',
 837         '72.0.3626.44',
 838         '71.0.3578.119',
 839         '73.0.3659.1',
 840         '73.0.3659.0',
 841         '72.0.3626.43',
 842         '71.0.3578.118',
 843         '73.0.3658.1',
 844         '73.0.3658.0',
 845         '72.0.3626.42',
 846         '71.0.3578.117',
 847         '73.0.3657.1',
 848         '73.0.3657.0',
 849         '72.0.3626.41',
 850         '71.0.3578.116',
 851         '73.0.3656.1',
 852         '73.0.3656.0',
 853         '72.0.3626.40',
 854         '71.0.3578.115',
 855         '73.0.3655.1',
 856         '73.0.3655.0',
 857         '72.0.3626.39',
 858         '71.0.3578.114',
 859         '73.0.3654.1',
 860         '73.0.3654.0',
 861         '72.0.3626.38',
 862         '71.0.3578.113',
 863         '73.0.3653.1',
 864         '73.0.3653.0',
 865         '72.0.3626.37',
 866         '71.0.3578.112',
 867         '73.0.3652.1',
 868         '73.0.3652.0',
 869         '72.0.3626.36',
 870         '71.0.3578.111',
 871         '73.0.3651.1',
 872         '73.0.3651.0',
 873         '72.0.3626.35',
 874         '71.0.3578.110',
 875         '73.0.3650.1',
 876         '73.0.3650.0',
 877         '72.0.3626.34',
 878         '71.0.3578.109',
 879         '73.0.3649.1',
 880         '73.0.3649.0',
 881         '72.0.3626.33',
 882         '71.0.3578.108',
 883         '73.0.3648.2',
 884         '73.0.3648.1',
 885         '73.0.3648.0',
 886         '72.0.3626.32',
 887         '71.0.3578.107',
 888         '73.0.3647.2',
 889         '73.0.3647.1',
 890         '73.0.3647.0',
 891         '72.0.3626.31',
 892         '71.0.3578.106',
 893         '73.0.3635.3',
 894         '73.0.3646.2',
 895         '73.0.3646.1',
 896         '73.0.3646.0',
 897         '72.0.3626.30',
 898         '71.0.3578.105',
 899         '72.0.3626.29',
 900         '73.0.3645.2',
 901         '73.0.3645.1',
 902         '73.0.3645.0',
 903         '72.0.3626.28',
 904         '71.0.3578.104',
 905         '72.0.3626.27',
 906         '72.0.3626.26',
 907         '72.0.3626.25',
 908         '72.0.3626.24',
 909         '73.0.3644.0',
 910         '73.0.3643.2',
 911         '72.0.3626.23',
 912         '71.0.3578.103',
 913         '73.0.3643.1',
 914         '73.0.3643.0',
 915         '72.0.3626.22',
 916         '71.0.3578.102',
 917         '73.0.3642.1',
 918         '73.0.3642.0',
 919         '72.0.3626.21',
 920         '71.0.3578.101',
 921         '73.0.3641.1',
 922         '73.0.3641.0',
 923         '72.0.3626.20',
 924         '71.0.3578.100',
 925         '72.0.3626.19',
 926         '73.0.3640.1',
 927         '73.0.3640.0',
 928         '72.0.3626.18',
 929         '73.0.3639.1',
 930         '71.0.3578.99',
 931         '73.0.3639.0',
 932         '72.0.3626.17',
 933         '73.0.3638.2',
 934         '72.0.3626.16',
 935         '73.0.3638.1',
 936         '73.0.3638.0',
 937         '72.0.3626.15',
 938         '71.0.3578.98',
 939         '73.0.3635.2',
 940         '71.0.3578.97',
 941         '73.0.3637.1',
 942         '73.0.3637.0',
 943         '72.0.3626.14',
 944         '71.0.3578.96',
 945         '71.0.3578.95',
 946         '72.0.3626.13',
 947         '71.0.3578.94',
 948         '73.0.3636.2',
 949         '71.0.3578.93',
 950         '73.0.3636.1',
 951         '73.0.3636.0',
 952         '72.0.3626.12',
 953         '71.0.3578.92',
 954         '73.0.3635.1',
 955         '73.0.3635.0',
 956         '72.0.3626.11',
 957         '71.0.3578.91',
 958         '73.0.3634.2',
 959         '73.0.3634.1',
 960         '73.0.3634.0',
 961         '72.0.3626.10',
 962         '71.0.3578.90',
 963         '71.0.3578.89',
 964         '73.0.3633.2',
 965         '73.0.3633.1',
 966         '73.0.3633.0',
 967         '72.0.3610.4',
 968         '72.0.3626.9',
 969         '71.0.3578.88',
 970         '73.0.3632.5',
 971         '73.0.3632.4',
 972         '73.0.3632.3',
 973         '73.0.3632.2',
 974         '73.0.3632.1',
 975         '73.0.3632.0',
 976         '72.0.3626.8',
 977         '71.0.3578.87',
 978         '73.0.3631.2',
 979         '73.0.3631.1',
 980         '73.0.3631.0',
 981         '72.0.3626.7',
 982         '71.0.3578.86',
 983         '72.0.3626.6',
 984         '73.0.3630.1',
 985         '73.0.3630.0',
 986         '72.0.3626.5',
 987         '71.0.3578.85',
 988         '72.0.3626.4',
 989         '73.0.3628.3',
 990         '73.0.3628.2',
 991         '73.0.3629.1',
 992         '73.0.3629.0',
 993         '72.0.3626.3',
 994         '71.0.3578.84',
 995         '73.0.3628.1',
 996         '73.0.3628.0',
 997         '71.0.3578.83',
 998         '73.0.3627.1',
 999         '73.0.3627.0',
1000         '72.0.3626.2',
1001         '71.0.3578.82',
1002         '71.0.3578.81',
1003         '71.0.3578.80',
1004         '72.0.3626.1',
1005         '72.0.3626.0',
1006         '71.0.3578.79',
1007         '70.0.3538.124',
1008         '71.0.3578.78',
1009         '72.0.3623.4',
1010         '72.0.3625.2',
1011         '72.0.3625.1',
1012         '72.0.3625.0',
1013         '71.0.3578.77',
1014         '70.0.3538.123',
1015         '72.0.3624.4',
1016         '72.0.3624.3',
1017         '72.0.3624.2',
1018         '71.0.3578.76',
1019         '72.0.3624.1',
1020         '72.0.3624.0',
1021         '72.0.3623.3',
1022         '71.0.3578.75',
1023         '70.0.3538.122',
1024         '71.0.3578.74',
1025         '72.0.3623.2',
1026         '72.0.3610.3',
1027         '72.0.3623.1',
1028         '72.0.3623.0',
1029         '72.0.3622.3',
1030         '72.0.3622.2',
1031         '71.0.3578.73',
1032         '70.0.3538.121',
1033         '72.0.3622.1',
1034         '72.0.3622.0',
1035         '71.0.3578.72',
1036         '70.0.3538.120',
1037         '72.0.3621.1',
1038         '72.0.3621.0',
1039         '71.0.3578.71',
1040         '70.0.3538.119',
1041         '72.0.3620.1',
1042         '72.0.3620.0',
1043         '71.0.3578.70',
1044         '70.0.3538.118',
1045         '71.0.3578.69',
1046         '72.0.3619.1',
1047         '72.0.3619.0',
1048         '71.0.3578.68',
1049         '70.0.3538.117',
1050         '71.0.3578.67',
1051         '72.0.3618.1',
1052         '72.0.3618.0',
1053         '71.0.3578.66',
1054         '70.0.3538.116',
1055         '72.0.3617.1',
1056         '72.0.3617.0',
1057         '71.0.3578.65',
1058         '70.0.3538.115',
1059         '72.0.3602.3',
1060         '71.0.3578.64',
1061         '72.0.3616.1',
1062         '72.0.3616.0',
1063         '71.0.3578.63',
1064         '70.0.3538.114',
1065         '71.0.3578.62',
1066         '72.0.3615.1',
1067         '72.0.3615.0',
1068         '71.0.3578.61',
1069         '70.0.3538.113',
1070         '72.0.3614.1',
1071         '72.0.3614.0',
1072         '71.0.3578.60',
1073         '70.0.3538.112',
1074         '72.0.3613.1',
1075         '72.0.3613.0',
1076         '71.0.3578.59',
1077         '70.0.3538.111',
1078         '72.0.3612.2',
1079         '72.0.3612.1',
1080         '72.0.3612.0',
1081         '70.0.3538.110',
1082         '71.0.3578.58',
1083         '70.0.3538.109',
1084         '72.0.3611.2',
1085         '72.0.3611.1',
1086         '72.0.3611.0',
1087         '71.0.3578.57',
1088         '70.0.3538.108',
1089         '72.0.3610.2',
1090         '71.0.3578.56',
1091         '71.0.3578.55',
1092         '72.0.3610.1',
1093         '72.0.3610.0',
1094         '71.0.3578.54',
1095         '70.0.3538.107',
1096         '71.0.3578.53',
1097         '72.0.3609.3',
1098         '71.0.3578.52',
1099         '72.0.3609.2',
1100         '71.0.3578.51',
1101         '72.0.3608.5',
1102         '72.0.3609.1',
1103         '72.0.3609.0',
1104         '71.0.3578.50',
1105         '70.0.3538.106',
1106         '72.0.3608.4',
1107         '72.0.3608.3',
1108         '72.0.3608.2',
1109         '71.0.3578.49',
1110         '72.0.3608.1',
1111         '72.0.3608.0',
1112         '70.0.3538.105',
1113         '71.0.3578.48',
1114         '72.0.3607.1',
1115         '72.0.3607.0',
1116         '71.0.3578.47',
1117         '70.0.3538.104',
1118         '72.0.3606.2',
1119         '72.0.3606.1',
1120         '72.0.3606.0',
1121         '71.0.3578.46',
1122         '70.0.3538.103',
1123         '70.0.3538.102',
1124         '72.0.3605.3',
1125         '72.0.3605.2',
1126         '72.0.3605.1',
1127         '72.0.3605.0',
1128         '71.0.3578.45',
1129         '70.0.3538.101',
1130         '71.0.3578.44',
1131         '71.0.3578.43',
1132         '70.0.3538.100',
1133         '70.0.3538.99',
1134         '71.0.3578.42',
1135         '72.0.3604.1',
1136         '72.0.3604.0',
1137         '71.0.3578.41',
1138         '70.0.3538.98',
1139         '71.0.3578.40',
1140         '72.0.3603.2',
1141         '72.0.3603.1',
1142         '72.0.3603.0',
1143         '71.0.3578.39',
1144         '70.0.3538.97',
1145         '72.0.3602.2',
1146         '71.0.3578.38',
1147         '71.0.3578.37',
1148         '72.0.3602.1',
1149         '72.0.3602.0',
1150         '71.0.3578.36',
1151         '70.0.3538.96',
1152         '72.0.3601.1',
1153         '72.0.3601.0',
1154         '71.0.3578.35',
1155         '70.0.3538.95',
1156         '72.0.3600.1',
1157         '72.0.3600.0',
1158         '71.0.3578.34',
1159         '70.0.3538.94',
1160         '72.0.3599.3',
1161         '72.0.3599.2',
1162         '72.0.3599.1',
1163         '72.0.3599.0',
1164         '71.0.3578.33',
1165         '70.0.3538.93',
1166         '72.0.3598.1',
1167         '72.0.3598.0',
1168         '71.0.3578.32',
1169         '70.0.3538.87',
1170         '72.0.3597.1',
1171         '72.0.3597.0',
1172         '72.0.3596.2',
1173         '71.0.3578.31',
1174         '70.0.3538.86',
1175         '71.0.3578.30',
1176         '71.0.3578.29',
1177         '72.0.3596.1',
1178         '72.0.3596.0',
1179         '71.0.3578.28',
1180         '70.0.3538.85',
1181         '72.0.3595.2',
1182         '72.0.3591.3',
1183         '72.0.3595.1',
1184         '72.0.3595.0',
1185         '71.0.3578.27',
1186         '70.0.3538.84',
1187         '72.0.3594.1',
1188         '72.0.3594.0',
1189         '71.0.3578.26',
1190         '70.0.3538.83',
1191         '72.0.3593.2',
1192         '72.0.3593.1',
1193         '72.0.3593.0',
1194         '71.0.3578.25',
1195         '70.0.3538.82',
1196         '72.0.3589.3',
1197         '72.0.3592.2',
1198         '72.0.3592.1',
1199         '72.0.3592.0',
1200         '71.0.3578.24',
1201         '72.0.3589.2',
1202         '70.0.3538.81',
1203         '70.0.3538.80',
1204         '72.0.3591.2',
1205         '72.0.3591.1',
1206         '72.0.3591.0',
1207         '71.0.3578.23',
1208         '70.0.3538.79',
1209         '71.0.3578.22',
1210         '72.0.3590.1',
1211         '72.0.3590.0',
1212         '71.0.3578.21',
1213         '70.0.3538.78',
1214         '70.0.3538.77',
1215         '72.0.3589.1',
1216         '72.0.3589.0',
1217         '71.0.3578.20',
1218         '70.0.3538.76',
1219         '71.0.3578.19',
1220         '70.0.3538.75',
1221         '72.0.3588.1',
1222         '72.0.3588.0',
1223         '71.0.3578.18',
1224         '70.0.3538.74',
1225         '72.0.3586.2',
1226         '72.0.3587.0',
1227         '71.0.3578.17',
1228         '70.0.3538.73',
1229         '72.0.3586.1',
1230         '72.0.3586.0',
1231         '71.0.3578.16',
1232         '70.0.3538.72',
1233         '72.0.3585.1',
1234         '72.0.3585.0',
1235         '71.0.3578.15',
1236         '70.0.3538.71',
1237         '71.0.3578.14',
1238         '72.0.3584.1',
1239         '72.0.3584.0',
1240         '71.0.3578.13',
1241         '70.0.3538.70',
1242         '72.0.3583.2',
1243         '71.0.3578.12',
1244         '72.0.3583.1',
1245         '72.0.3583.0',
1246         '71.0.3578.11',
1247         '70.0.3538.69',
1248         '71.0.3578.10',
1249         '72.0.3582.0',
1250         '72.0.3581.4',
1251         '71.0.3578.9',
1252         '70.0.3538.67',
1253         '72.0.3581.3',
1254         '72.0.3581.2',
1255         '72.0.3581.1',
1256         '72.0.3581.0',
1257         '71.0.3578.8',
1258         '70.0.3538.66',
1259         '72.0.3580.1',
1260         '72.0.3580.0',
1261         '71.0.3578.7',
1262         '70.0.3538.65',
1263         '71.0.3578.6',
1264         '72.0.3579.1',
1265         '72.0.3579.0',
1266         '71.0.3578.5',
1267         '70.0.3538.64',
1268         '71.0.3578.4',
1269         '71.0.3578.3',
1270         '71.0.3578.2',
1271         '71.0.3578.1',
1272         '71.0.3578.0',
1273         '70.0.3538.63',
1274         '69.0.3497.128',
1275         '70.0.3538.62',
1276         '70.0.3538.61',
1277         '70.0.3538.60',
1278         '70.0.3538.59',
1279         '71.0.3577.1',
1280         '71.0.3577.0',
1281         '70.0.3538.58',
1282         '69.0.3497.127',
1283         '71.0.3576.2',
1284         '71.0.3576.1',
1285         '71.0.3576.0',
1286         '70.0.3538.57',
1287         '70.0.3538.56',
1288         '71.0.3575.2',
1289         '70.0.3538.55',
1290         '69.0.3497.126',
1291         '70.0.3538.54',
1292         '71.0.3575.1',
1293         '71.0.3575.0',
1294         '71.0.3574.1',
1295         '71.0.3574.0',
1296         '70.0.3538.53',
1297         '69.0.3497.125',
1298         '70.0.3538.52',
1299         '71.0.3573.1',
1300         '71.0.3573.0',
1301         '70.0.3538.51',
1302         '69.0.3497.124',
1303         '71.0.3572.1',
1304         '71.0.3572.0',
1305         '70.0.3538.50',
1306         '69.0.3497.123',
1307         '71.0.3571.2',
1308         '70.0.3538.49',
1309         '69.0.3497.122',
1310         '71.0.3571.1',
1311         '71.0.3571.0',
1312         '70.0.3538.48',
1313         '69.0.3497.121',
1314         '71.0.3570.1',
1315         '71.0.3570.0',
1316         '70.0.3538.47',
1317         '69.0.3497.120',
1318         '71.0.3568.2',
1319         '71.0.3569.1',
1320         '71.0.3569.0',
1321         '70.0.3538.46',
1322         '69.0.3497.119',
1323         '70.0.3538.45',
1324         '71.0.3568.1',
1325         '71.0.3568.0',
1326         '70.0.3538.44',
1327         '69.0.3497.118',
1328         '70.0.3538.43',
1329         '70.0.3538.42',
1330         '71.0.3567.1',
1331         '71.0.3567.0',
1332         '70.0.3538.41',
1333         '69.0.3497.117',
1334         '71.0.3566.1',
1335         '71.0.3566.0',
1336         '70.0.3538.40',
1337         '69.0.3497.116',
1338         '71.0.3565.1',
1339         '71.0.3565.0',
1340         '70.0.3538.39',
1341         '69.0.3497.115',
1342         '71.0.3564.1',
1343         '71.0.3564.0',
1344         '70.0.3538.38',
1345         '69.0.3497.114',
1346         '71.0.3563.0',
1347         '71.0.3562.2',
1348         '70.0.3538.37',
1349         '69.0.3497.113',
1350         '70.0.3538.36',
1351         '70.0.3538.35',
1352         '71.0.3562.1',
1353         '71.0.3562.0',
1354         '70.0.3538.34',
1355         '69.0.3497.112',
1356         '70.0.3538.33',
1357         '71.0.3561.1',
1358         '71.0.3561.0',
1359         '70.0.3538.32',
1360         '69.0.3497.111',
1361         '71.0.3559.6',
1362         '71.0.3560.1',
1363         '71.0.3560.0',
1364         '71.0.3559.5',
1365         '71.0.3559.4',
1366         '70.0.3538.31',
1367         '69.0.3497.110',
1368         '71.0.3559.3',
1369         '70.0.3538.30',
1370         '69.0.3497.109',
1371         '71.0.3559.2',
1372         '71.0.3559.1',
1373         '71.0.3559.0',
1374         '70.0.3538.29',
1375         '69.0.3497.108',
1376         '71.0.3558.2',
1377         '71.0.3558.1',
1378         '71.0.3558.0',
1379         '70.0.3538.28',
1380         '69.0.3497.107',
1381         '71.0.3557.2',
1382         '71.0.3557.1',
1383         '71.0.3557.0',
1384         '70.0.3538.27',
1385         '69.0.3497.106',
1386         '71.0.3554.4',
1387         '70.0.3538.26',
1388         '71.0.3556.1',
1389         '71.0.3556.0',
1390         '70.0.3538.25',
1391         '71.0.3554.3',
1392         '69.0.3497.105',
1393         '71.0.3554.2',
1394         '70.0.3538.24',
1395         '69.0.3497.104',
1396         '71.0.3555.2',
1397         '70.0.3538.23',
1398         '71.0.3555.1',
1399         '71.0.3555.0',
1400         '70.0.3538.22',
1401         '69.0.3497.103',
1402         '71.0.3554.1',
1403         '71.0.3554.0',
1404         '70.0.3538.21',
1405         '69.0.3497.102',
1406         '71.0.3553.3',
1407         '70.0.3538.20',
1408         '69.0.3497.101',
1409         '71.0.3553.2',
1410         '69.0.3497.100',
1411         '71.0.3553.1',
1412         '71.0.3553.0',
1413         '70.0.3538.19',
1414         '69.0.3497.99',
1415         '69.0.3497.98',
1416         '69.0.3497.97',
1417         '71.0.3552.6',
1418         '71.0.3552.5',
1419         '71.0.3552.4',
1420         '71.0.3552.3',
1421         '71.0.3552.2',
1422         '71.0.3552.1',
1423         '71.0.3552.0',
1424         '70.0.3538.18',
1425         '69.0.3497.96',
1426         '71.0.3551.3',
1427         '71.0.3551.2',
1428         '71.0.3551.1',
1429         '71.0.3551.0',
1430         '70.0.3538.17',
1431         '69.0.3497.95',
1432         '71.0.3550.3',
1433         '71.0.3550.2',
1434         '71.0.3550.1',
1435         '71.0.3550.0',
1436         '70.0.3538.16',
1437         '69.0.3497.94',
1438         '71.0.3549.1',
1439         '71.0.3549.0',
1440         '70.0.3538.15',
1441         '69.0.3497.93',
1442         '69.0.3497.92',
1443         '71.0.3548.1',
1444         '71.0.3548.0',
1445         '70.0.3538.14',
1446         '69.0.3497.91',
1447         '71.0.3547.1',
1448         '71.0.3547.0',
1449         '70.0.3538.13',
1450         '69.0.3497.90',
1451         '71.0.3546.2',
1452         '69.0.3497.89',
1453         '71.0.3546.1',
1454         '71.0.3546.0',
1455         '70.0.3538.12',
1456         '69.0.3497.88',
1457         '71.0.3545.4',
1458         '71.0.3545.3',
1459         '71.0.3545.2',
1460         '71.0.3545.1',
1461         '71.0.3545.0',
1462         '70.0.3538.11',
1463         '69.0.3497.87',
1464         '71.0.3544.5',
1465         '71.0.3544.4',
1466         '71.0.3544.3',
1467         '71.0.3544.2',
1468         '71.0.3544.1',
1469         '71.0.3544.0',
1470         '69.0.3497.86',
1471         '70.0.3538.10',
1472         '69.0.3497.85',
1473         '70.0.3538.9',
1474         '69.0.3497.84',
1475         '71.0.3543.4',
1476         '70.0.3538.8',
1477         '71.0.3543.3',
1478         '71.0.3543.2',
1479         '71.0.3543.1',
1480         '71.0.3543.0',
1481         '70.0.3538.7',
1482         '69.0.3497.83',
1483         '71.0.3542.2',
1484         '71.0.3542.1',
1485         '71.0.3542.0',
1486         '70.0.3538.6',
1487         '69.0.3497.82',
1488         '69.0.3497.81',
1489         '71.0.3541.1',
1490         '71.0.3541.0',
1491         '70.0.3538.5',
1492         '69.0.3497.80',
1493         '71.0.3540.1',
1494         '71.0.3540.0',
1495         '70.0.3538.4',
1496         '69.0.3497.79',
1497         '70.0.3538.3',
1498         '71.0.3539.1',
1499         '71.0.3539.0',
1500         '69.0.3497.78',
1501         '68.0.3440.134',
1502         '69.0.3497.77',
1503         '70.0.3538.2',
1504         '70.0.3538.1',
1505         '70.0.3538.0',
1506         '69.0.3497.76',
1507         '68.0.3440.133',
1508         '69.0.3497.75',
1509         '70.0.3537.2',
1510         '70.0.3537.1',
1511         '70.0.3537.0',
1512         '69.0.3497.74',
1513         '68.0.3440.132',
1514         '70.0.3536.0',
1515         '70.0.3535.5',
1516         '70.0.3535.4',
1517         '70.0.3535.3',
1518         '69.0.3497.73',
1519         '68.0.3440.131',
1520         '70.0.3532.8',
1521         '70.0.3532.7',
1522         '69.0.3497.72',
1523         '69.0.3497.71',
1524         '70.0.3535.2',
1525         '70.0.3535.1',
1526         '70.0.3535.0',
1527         '69.0.3497.70',
1528         '68.0.3440.130',
1529         '69.0.3497.69',
1530         '68.0.3440.129',
1531         '70.0.3534.4',
1532         '70.0.3534.3',
1533         '70.0.3534.2',
1534         '70.0.3534.1',
1535         '70.0.3534.0',
1536         '69.0.3497.68',
1537         '68.0.3440.128',
1538         '70.0.3533.2',
1539         '70.0.3533.1',
1540         '70.0.3533.0',
1541         '69.0.3497.67',
1542         '68.0.3440.127',
1543         '70.0.3532.6',
1544         '70.0.3532.5',
1545         '70.0.3532.4',
1546         '69.0.3497.66',
1547         '68.0.3440.126',
1548         '70.0.3532.3',
1549         '70.0.3532.2',
1550         '70.0.3532.1',
1551         '69.0.3497.60',
1552         '69.0.3497.65',
1553         '69.0.3497.64',
1554         '70.0.3532.0',
1555         '70.0.3531.0',
1556         '70.0.3530.4',
1557         '70.0.3530.3',
1558         '70.0.3530.2',
1559         '69.0.3497.58',
1560         '68.0.3440.125',
1561         '69.0.3497.57',
1562         '69.0.3497.56',
1563         '69.0.3497.55',
1564         '69.0.3497.54',
1565         '70.0.3530.1',
1566         '70.0.3530.0',
1567         '69.0.3497.53',
1568         '68.0.3440.124',
1569         '69.0.3497.52',
1570         '70.0.3529.3',
1571         '70.0.3529.2',
1572         '70.0.3529.1',
1573         '70.0.3529.0',
1574         '69.0.3497.51',
1575         '70.0.3528.4',
1576         '68.0.3440.123',
1577         '70.0.3528.3',
1578         '70.0.3528.2',
1579         '70.0.3528.1',
1580         '70.0.3528.0',
1581         '69.0.3497.50',
1582         '68.0.3440.122',
1583         '70.0.3527.1',
1584         '70.0.3527.0',
1585         '69.0.3497.49',
1586         '68.0.3440.121',
1587         '70.0.3526.1',
1588         '70.0.3526.0',
1589         '68.0.3440.120',
1590         '69.0.3497.48',
1591         '69.0.3497.47',
1592         '68.0.3440.119',
1593         '68.0.3440.118',
1594         '70.0.3525.5',
1595         '70.0.3525.4',
1596         '70.0.3525.3',
1597         '68.0.3440.117',
1598         '69.0.3497.46',
1599         '70.0.3525.2',
1600         '70.0.3525.1',
1601         '70.0.3525.0',
1602         '69.0.3497.45',
1603         '68.0.3440.116',
1604         '70.0.3524.4',
1605         '70.0.3524.3',
1606         '69.0.3497.44',
1607         '70.0.3524.2',
1608         '70.0.3524.1',
1609         '70.0.3524.0',
1610         '70.0.3523.2',
1611         '69.0.3497.43',
1612         '68.0.3440.115',
1613         '70.0.3505.9',
1614         '69.0.3497.42',
1615         '70.0.3505.8',
1616         '70.0.3523.1',
1617         '70.0.3523.0',
1618         '69.0.3497.41',
1619         '68.0.3440.114',
1620         '70.0.3505.7',
1621         '69.0.3497.40',
1622         '70.0.3522.1',
1623         '70.0.3522.0',
1624         '70.0.3521.2',
1625         '69.0.3497.39',
1626         '68.0.3440.113',
1627         '70.0.3505.6',
1628         '70.0.3521.1',
1629         '70.0.3521.0',
1630         '69.0.3497.38',
1631         '68.0.3440.112',
1632         '70.0.3520.1',
1633         '70.0.3520.0',
1634         '69.0.3497.37',
1635         '68.0.3440.111',
1636         '70.0.3519.3',
1637         '70.0.3519.2',
1638         '70.0.3519.1',
1639         '70.0.3519.0',
1640         '69.0.3497.36',
1641         '68.0.3440.110',
1642         '70.0.3518.1',
1643         '70.0.3518.0',
1644         '69.0.3497.35',
1645         '69.0.3497.34',
1646         '68.0.3440.109',
1647         '70.0.3517.1',
1648         '70.0.3517.0',
1649         '69.0.3497.33',
1650         '68.0.3440.108',
1651         '69.0.3497.32',
1652         '70.0.3516.3',
1653         '70.0.3516.2',
1654         '70.0.3516.1',
1655         '70.0.3516.0',
1656         '69.0.3497.31',
1657         '68.0.3440.107',
1658         '70.0.3515.4',
1659         '68.0.3440.106',
1660         '70.0.3515.3',
1661         '70.0.3515.2',
1662         '70.0.3515.1',
1663         '70.0.3515.0',
1664         '69.0.3497.30',
1665         '68.0.3440.105',
1666         '68.0.3440.104',
1667         '70.0.3514.2',
1668         '70.0.3514.1',
1669         '70.0.3514.0',
1670         '69.0.3497.29',
1671         '68.0.3440.103',
1672         '70.0.3513.1',
1673         '70.0.3513.0',
1674         '69.0.3497.28',
1675     )
1676     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680     'User-Agent': random_user_agent(),
1681     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683     'Accept-Encoding': 'gzip, deflate',
1684     'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696     'January', 'February', 'March', 'April', 'May', 'June',
1697     'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700     'en': ENGLISH_MONTH_NAMES,
1701     'fr': [
1702         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708     'flv', 'f4v', 'f4a', 'f4b',
1709     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710     'mkv', 'mka', 'mk3d',
1711     'avi', 'divx',
1712     'mov',
1713     'asf', 'wmv', 'wma',
1714     '3gp', '3g2',
1715     'mp3',
1716     'flac',
1717     'ape',
1718     'wav',
1719     'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727     '%d %B %Y',
1728     '%d %b %Y',
1729     '%B %d %Y',
1730     '%B %dst %Y',
1731     '%B %dnd %Y',
1732     '%B %drd %Y',
1733     '%B %dth %Y',
1734     '%b %d %Y',
1735     '%b %dst %Y',
1736     '%b %dnd %Y',
1737     '%b %drd %Y',
1738     '%b %dth %Y',
1739     '%b %dst %Y %I:%M',
1740     '%b %dnd %Y %I:%M',
1741     '%b %drd %Y %I:%M',
1742     '%b %dth %Y %I:%M',
1743     '%Y %m %d',
1744     '%Y-%m-%d',
1745     '%Y.%m.%d.',
1746     '%Y/%m/%d',
1747     '%Y/%m/%d %H:%M',
1748     '%Y/%m/%d %H:%M:%S',
1749     '%Y%m%d%H%M',
1750     '%Y%m%d%H%M%S',
1751     '%Y-%m-%d %H:%M',
1752     '%Y-%m-%d %H:%M:%S',
1753     '%Y-%m-%d %H:%M:%S.%f',
1754     '%Y-%m-%d %H:%M:%S:%f',
1755     '%d.%m.%Y %H:%M',
1756     '%d.%m.%Y %H.%M',
1757     '%Y-%m-%dT%H:%M:%SZ',
1758     '%Y-%m-%dT%H:%M:%S.%fZ',
1759     '%Y-%m-%dT%H:%M:%S.%f0Z',
1760     '%Y-%m-%dT%H:%M:%S',
1761     '%Y-%m-%dT%H:%M:%S.%f',
1762     '%Y-%m-%dT%H:%M',
1763     '%b %d %Y at %H:%M',
1764     '%b %d %Y at %H:%M:%S',
1765     '%B %d %Y at %H:%M',
1766     '%B %d %Y at %H:%M:%S',
1767     '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772     '%d-%m-%Y',
1773     '%d.%m.%Y',
1774     '%d.%m.%y',
1775     '%d/%m/%Y',
1776     '%d/%m/%y',
1777     '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782     '%m-%d-%Y',
1783     '%m.%d.%Y',
1784     '%m/%d/%Y',
1785     '%m/%d/%y',
1786     '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794     """Get preferred encoding.
1795
1796     Returns the best encoding scheme for the system, based on
1797     locale.getpreferredencoding() and some further tweaks.
1798     """
1799     try:
1800         pref = locale.getpreferredencoding()
1801         'TEST'.encode(pref)
1802     except Exception:
1803         pref = 'UTF-8'
1804
1805     return pref
1806
1807
1808 def write_json_file(obj, fn):
1809     """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811     fn = encodeFilename(fn)
1812     if sys.version_info < (3, 0) and sys.platform != 'win32':
1813         encoding = get_filesystem_encoding()
1814         # os.path.basename returns a bytes object, but NamedTemporaryFile
1815         # will fail if the filename contains non ascii characters unless we
1816         # use a unicode object
1817         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818         # the same for os.path.dirname
1819         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820     else:
1821         path_basename = os.path.basename
1822         path_dirname = os.path.dirname
1823
1824     args = {
1825         'suffix': '.tmp',
1826         'prefix': path_basename(fn) + '.',
1827         'dir': path_dirname(fn),
1828         'delete': False,
1829     }
1830
1831     # In Python 2.x, json.dump expects a bytestream.
1832     # In Python 3.x, it writes to a character stream
1833     if sys.version_info < (3, 0):
1834         args['mode'] = 'wb'
1835     else:
1836         args.update({
1837             'mode': 'w',
1838             'encoding': 'utf-8',
1839         })
1840
1841     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843     try:
1844         with tf:
1845             json.dump(obj, tf)
1846         if sys.platform == 'win32':
1847             # Need to remove existing file on Windows, else os.rename raises
1848             # WindowsError or FileExistsError.
1849             try:
1850                 os.unlink(fn)
1851             except OSError:
1852                 pass
1853         try:
1854             mask = os.umask(0)
1855             os.umask(mask)
1856             os.chmod(tf.name, 0o666 & ~mask)
1857         except OSError:
1858             pass
1859         os.rename(tf.name, fn)
1860     except Exception:
1861         try:
1862             os.remove(tf.name)
1863         except OSError:
1864             pass
1865         raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         """ Find the xpath xpath[@key=val] """
1871         assert re.match(r'^[a-zA-Z_-]+$', key)
1872         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873         return node.find(expr)
1874 else:
1875     def find_xpath_attr(node, xpath, key, val=None):
1876         for f in node.findall(compat_xpath(xpath)):
1877             if key not in f.attrib:
1878                 continue
1879             if val is None or f.attrib.get(key) == val:
1880                 return f
1881         return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888     components = [c.split(':') for c in path.split('/')]
1889     replaced = []
1890     for c in components:
1891         if len(c) == 1:
1892             replaced.append(c[0])
1893         else:
1894             ns, tag = c
1895             replaced.append('{%s}%s' % (ns_map[ns], tag))
1896     return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900     def _find_xpath(xpath):
1901         return node.find(compat_xpath(xpath))
1902
1903     if isinstance(xpath, (str, compat_str)):
1904         n = _find_xpath(xpath)
1905     else:
1906         for xp in xpath:
1907             n = _find_xpath(xp)
1908             if n is not None:
1909                 break
1910
1911     if n is None:
1912         if default is not NO_DEFAULT:
1913             return default
1914         elif fatal:
1915             name = xpath if name is None else name
1916             raise ExtractorError('Could not find XML element %s' % name)
1917         else:
1918             return None
1919     return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924     if n is None or n == default:
1925         return n
1926     if n.text is None:
1927         if default is not NO_DEFAULT:
1928             return default
1929         elif fatal:
1930             name = xpath if name is None else name
1931             raise ExtractorError('Could not find XML element\'s text %s' % name)
1932         else:
1933             return None
1934     return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938     n = find_xpath_attr(node, xpath, key)
1939     if n is None:
1940         if default is not NO_DEFAULT:
1941             return default
1942         elif fatal:
1943             name = '%s[@%s]' % (xpath, key) if name is None else name
1944             raise ExtractorError('Could not find XML attribute %s' % name)
1945         else:
1946             return None
1947     return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951     """Return the content of the tag with the specified ID in the passed HTML document"""
1952     return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956     """Return the content of the first tag with the specified class in the passed HTML document"""
1957     retval = get_elements_by_class(class_name, html)
1958     return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963     return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968     return get_elements_by_attribute(
1969         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970         html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974     """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976     value = re.escape(value) if escape_value else value
1977
1978     retlist = []
1979     for m in re.finditer(r'''(?xs)
1980         <([a-zA-Z0-9:._-]+)
1981          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982          \s+%s=['"]?%s['"]?
1983          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984         \s*>
1985         (?P<content>.*?)
1986         </\1>
1987     ''' % (re.escape(attribute), value), html):
1988         res = m.group('content')
1989
1990         if res.startswith('"') or res.startswith("'"):
1991             res = res[1:-1]
1992
1993         retlist.append(unescapeHTML(res))
1994
1995     return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999     """Trivial HTML parser to gather the attributes for a single element"""
2000
2001     def __init__(self):
2002         self.attrs = {}
2003         compat_HTMLParser.__init__(self)
2004
2005     def handle_starttag(self, tag, attrs):
2006         self.attrs = dict(attrs)
2007
2008
2009 def extract_attributes(html_element):
2010     """Given a string for an HTML element such as
2011     <el
2012          a="foo" B="bar" c="&98;az" d=boz
2013          empty= noval entity="&amp;"
2014          sq='"' dq="'"
2015     >
2016     Decode and return a dictionary of attributes.
2017     {
2018         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019         'empty': '', 'noval': None, 'entity': '&',
2020         'sq': '"', 'dq': '\''
2021     }.
2022     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024     """
2025     parser = HTMLAttributeParser()
2026     try:
2027         parser.feed(html_element)
2028         parser.close()
2029     # Older Python may throw HTMLParseError in case of malformed HTML
2030     except compat_HTMLParseError:
2031         pass
2032     return parser.attrs
2033
2034
2035 def clean_html(html):
2036     """Clean an HTML snippet into a readable string"""
2037
2038     if html is None:  # Convenience for sanitizing descriptions etc.
2039         return html
2040
2041     # Newline vs <br />
2042     html = html.replace('\n', ' ')
2043     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2045     # Strip html tags
2046     html = re.sub('<.*?>', '', html)
2047     # Replace html entities
2048     html = unescapeHTML(html)
2049     return html.strip()
2050
2051
2052 def sanitize_open(filename, open_mode):
2053     """Try to open the given filename, and slightly tweak it if this fails.
2054
2055     Attempts to open the given filename. If this fails, it tries to change
2056     the filename slightly, step by step, until it's either able to open it
2057     or it fails and raises a final exception, like the standard open()
2058     function.
2059
2060     It returns the tuple (stream, definitive_file_name).
2061     """
2062     try:
2063         if filename == '-':
2064             if sys.platform == 'win32':
2065                 import msvcrt
2066                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068         stream = open(encodeFilename(filename), open_mode)
2069         return (stream, filename)
2070     except (IOError, OSError) as err:
2071         if err.errno in (errno.EACCES,):
2072             raise
2073
2074         # In case of error, try to remove win32 forbidden chars
2075         alt_filename = sanitize_path(filename)
2076         if alt_filename == filename:
2077             raise
2078         else:
2079             # An exception here should be caught in the caller
2080             stream = open(encodeFilename(alt_filename), open_mode)
2081             return (stream, alt_filename)
2082
2083
2084 def timeconvert(timestr):
2085     """Convert RFC 2822 defined time string into system timestamp"""
2086     timestamp = None
2087     timetuple = email.utils.parsedate_tz(timestr)
2088     if timetuple is not None:
2089         timestamp = email.utils.mktime_tz(timetuple)
2090     return timestamp
2091
2092
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094     """Sanitizes a string so it could be used as part of a filename.
2095     If restricted is set, use a stricter subset of allowed characters.
2096     Set is_id if this is not an arbitrary string, but an ID that should be kept
2097     if possible.
2098     """
2099     def replace_insane(char):
2100         if restricted and char in ACCENT_CHARS:
2101             return ACCENT_CHARS[char]
2102         if char == '?' or ord(char) < 32 or ord(char) == 127:
2103             return ''
2104         elif char == '"':
2105             return '' if restricted else '\''
2106         elif char == ':':
2107             return '_-' if restricted else ' -'
2108         elif char in '\\/|*<>':
2109             return '_'
2110         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2111             return '_'
2112         if restricted and ord(char) > 127:
2113             return '_'
2114         return char
2115
2116     if s == '':
2117         return ''
2118     # Handle timestamps
2119     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2120     result = ''.join(map(replace_insane, s))
2121     if not is_id:
2122         while '__' in result:
2123             result = result.replace('__', '_')
2124         result = result.strip('_')
2125         # Common case of "Foreign band name - English song title"
2126         if restricted and result.startswith('-_'):
2127             result = result[2:]
2128         if result.startswith('-'):
2129             result = '_' + result[len('-'):]
2130         result = result.lstrip('.')
2131         if not result:
2132             result = '_'
2133     return result
2134
2135
2136 def sanitize_path(s, force=False):
2137     """Sanitizes and normalizes path on Windows"""
2138     if sys.platform == 'win32':
2139         force = False
2140         drive_or_unc, _ = os.path.splitdrive(s)
2141         if sys.version_info < (2, 7) and not drive_or_unc:
2142             drive_or_unc, _ = os.path.splitunc(s)
2143     elif force:
2144         drive_or_unc = ''
2145     else:
2146         return s
2147
2148     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2149     if drive_or_unc:
2150         norm_path.pop(0)
2151     sanitized_path = [
2152         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2153         for path_part in norm_path]
2154     if drive_or_unc:
2155         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2156     elif force and s[0] == os.path.sep:
2157         sanitized_path.insert(0, os.path.sep)
2158     return os.path.join(*sanitized_path)
2159
2160
2161 def sanitize_url(url):
2162     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2163     # the number of unwanted failures due to missing protocol
2164     if url.startswith('//'):
2165         return 'http:%s' % url
2166     # Fix some common typos seen so far
2167     COMMON_TYPOS = (
2168         # https://github.com/ytdl-org/youtube-dl/issues/15649
2169         (r'^httpss://', r'https://'),
2170         # https://bx1.be/lives/direct-tv/
2171         (r'^rmtp([es]?)://', r'rtmp\1://'),
2172     )
2173     for mistake, fixup in COMMON_TYPOS:
2174         if re.match(mistake, url):
2175             return re.sub(mistake, fixup, url)
2176     return url
2177
2178
2179 def extract_basic_auth(url):
2180     parts = compat_urlparse.urlsplit(url)
2181     if parts.username is None:
2182         return url, None
2183     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2184         parts.hostname if parts.port is None
2185         else '%s:%d' % (parts.hostname, parts.port))))
2186     auth_payload = base64.b64encode(
2187         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2188     return url, 'Basic ' + auth_payload.decode('utf-8')
2189
2190
2191 def sanitized_Request(url, *args, **kwargs):
2192     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2193     if auth_header is not None:
2194         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2195         headers['Authorization'] = auth_header
2196     return compat_urllib_request.Request(url, *args, **kwargs)
2197
2198
2199 def expand_path(s):
2200     """Expand shell variables and ~"""
2201     return os.path.expandvars(compat_expanduser(s))
2202
2203
2204 def orderedSet(iterable):
2205     """ Remove all duplicates from the input iterable """
2206     res = []
2207     for el in iterable:
2208         if el not in res:
2209             res.append(el)
2210     return res
2211
2212
2213 def _htmlentity_transform(entity_with_semicolon):
2214     """Transforms an HTML entity to a character."""
2215     entity = entity_with_semicolon[:-1]
2216
2217     # Known non-numeric HTML entity
2218     if entity in compat_html_entities.name2codepoint:
2219         return compat_chr(compat_html_entities.name2codepoint[entity])
2220
2221     # TODO: HTML5 allows entities without a semicolon. For example,
2222     # '&Eacuteric' should be decoded as 'Éric'.
2223     if entity_with_semicolon in compat_html_entities_html5:
2224         return compat_html_entities_html5[entity_with_semicolon]
2225
2226     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2227     if mobj is not None:
2228         numstr = mobj.group(1)
2229         if numstr.startswith('x'):
2230             base = 16
2231             numstr = '0%s' % numstr
2232         else:
2233             base = 10
2234         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2235         try:
2236             return compat_chr(int(numstr, base))
2237         except ValueError:
2238             pass
2239
2240     # Unknown entity in name, return its literal representation
2241     return '&%s;' % entity
2242
2243
2244 def unescapeHTML(s):
2245     if s is None:
2246         return None
2247     assert type(s) == compat_str
2248
2249     return re.sub(
2250         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2251
2252
2253 def escapeHTML(text):
2254     return (
2255         text
2256         .replace('&', '&amp;')
2257         .replace('<', '&lt;')
2258         .replace('>', '&gt;')
2259         .replace('"', '&quot;')
2260         .replace("'", '&#39;')
2261     )
2262
2263
2264 def process_communicate_or_kill(p, *args, **kwargs):
2265     try:
2266         return p.communicate(*args, **kwargs)
2267     except BaseException:  # Including KeyboardInterrupt
2268         p.kill()
2269         p.wait()
2270         raise
2271
2272
2273 def get_subprocess_encoding():
2274     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2275         # For subprocess calls, encode with locale encoding
2276         # Refer to http://stackoverflow.com/a/9951851/35070
2277         encoding = preferredencoding()
2278     else:
2279         encoding = sys.getfilesystemencoding()
2280     if encoding is None:
2281         encoding = 'utf-8'
2282     return encoding
2283
2284
2285 def encodeFilename(s, for_subprocess=False):
2286     """
2287     @param s The name of the file
2288     """
2289
2290     assert type(s) == compat_str
2291
2292     # Python 3 has a Unicode API
2293     if sys.version_info >= (3, 0):
2294         return s
2295
2296     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2297     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2298     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2299     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2300         return s
2301
2302     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2303     if sys.platform.startswith('java'):
2304         return s
2305
2306     return s.encode(get_subprocess_encoding(), 'ignore')
2307
2308
2309 def decodeFilename(b, for_subprocess=False):
2310
2311     if sys.version_info >= (3, 0):
2312         return b
2313
2314     if not isinstance(b, bytes):
2315         return b
2316
2317     return b.decode(get_subprocess_encoding(), 'ignore')
2318
2319
2320 def encodeArgument(s):
2321     if not isinstance(s, compat_str):
2322         # Legacy code that uses byte strings
2323         # Uncomment the following line after fixing all post processors
2324         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2325         s = s.decode('ascii')
2326     return encodeFilename(s, True)
2327
2328
2329 def decodeArgument(b):
2330     return decodeFilename(b, True)
2331
2332
2333 def decodeOption(optval):
2334     if optval is None:
2335         return optval
2336     if isinstance(optval, bytes):
2337         optval = optval.decode(preferredencoding())
2338
2339     assert isinstance(optval, compat_str)
2340     return optval
2341
2342
2343 def formatSeconds(secs, delim=':', msec=False):
2344     if secs > 3600:
2345         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2346     elif secs > 60:
2347         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2348     else:
2349         ret = '%d' % secs
2350     return '%s.%03d' % (ret, secs % 1) if msec else ret
2351
2352
2353 def make_HTTPS_handler(params, **kwargs):
2354     opts_no_check_certificate = params.get('nocheckcertificate', False)
2355     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2356         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2357         if opts_no_check_certificate:
2358             context.check_hostname = False
2359             context.verify_mode = ssl.CERT_NONE
2360         try:
2361             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2362         except TypeError:
2363             # Python 2.7.8
2364             # (create_default_context present but HTTPSHandler has no context=)
2365             pass
2366
2367     if sys.version_info < (3, 2):
2368         return YoutubeDLHTTPSHandler(params, **kwargs)
2369     else:  # Python < 3.4
2370         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2371         context.verify_mode = (ssl.CERT_NONE
2372                                if opts_no_check_certificate
2373                                else ssl.CERT_REQUIRED)
2374         context.set_default_verify_paths()
2375         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2376
2377
2378 def bug_reports_message(before=';'):
2379     if ytdl_is_updateable():
2380         update_cmd = 'type  yt-dlp -U  to update'
2381     else:
2382         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2383     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2384     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2385     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2386
2387     before = before.rstrip()
2388     if not before or before.endswith(('.', '!', '?')):
2389         msg = msg[0].title() + msg[1:]
2390
2391     return (before + ' ' if before else '') + msg
2392
2393
2394 class YoutubeDLError(Exception):
2395     """Base exception for YoutubeDL errors."""
2396     pass
2397
2398
2399 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2400 if hasattr(ssl, 'CertificateError'):
2401     network_exceptions.append(ssl.CertificateError)
2402 network_exceptions = tuple(network_exceptions)
2403
2404
2405 class ExtractorError(YoutubeDLError):
2406     """Error during info extraction."""
2407
2408     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2409         """ tb, if given, is the original traceback (so that it can be printed out).
2410         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2411         """
2412         if sys.exc_info()[0] in network_exceptions:
2413             expected = True
2414
2415         self.msg = str(msg)
2416         self.traceback = tb
2417         self.expected = expected
2418         self.cause = cause
2419         self.video_id = video_id
2420         self.ie = ie
2421         self.exc_info = sys.exc_info()  # preserve original exception
2422
2423         super(ExtractorError, self).__init__(''.join((
2424             format_field(ie, template='[%s] '),
2425             format_field(video_id, template='%s: '),
2426             self.msg,
2427             format_field(cause, template=' (caused by %r)'),
2428             '' if expected else bug_reports_message())))
2429
2430     def format_traceback(self):
2431         if self.traceback is None:
2432             return None
2433         return ''.join(traceback.format_tb(self.traceback))
2434
2435
2436 class UnsupportedError(ExtractorError):
2437     def __init__(self, url):
2438         super(UnsupportedError, self).__init__(
2439             'Unsupported URL: %s' % url, expected=True)
2440         self.url = url
2441
2442
2443 class RegexNotFoundError(ExtractorError):
2444     """Error when a regex didn't match"""
2445     pass
2446
2447
2448 class GeoRestrictedError(ExtractorError):
2449     """Geographic restriction Error exception.
2450
2451     This exception may be thrown when a video is not available from your
2452     geographic location due to geographic restrictions imposed by a website.
2453     """
2454
2455     def __init__(self, msg, countries=None):
2456         super(GeoRestrictedError, self).__init__(msg, expected=True)
2457         self.msg = msg
2458         self.countries = countries
2459
2460
2461 class DownloadError(YoutubeDLError):
2462     """Download Error exception.
2463
2464     This exception may be thrown by FileDownloader objects if they are not
2465     configured to continue on errors. They will contain the appropriate
2466     error message.
2467     """
2468
2469     def __init__(self, msg, exc_info=None):
2470         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2471         super(DownloadError, self).__init__(msg)
2472         self.exc_info = exc_info
2473
2474
2475 class EntryNotInPlaylist(YoutubeDLError):
2476     """Entry not in playlist exception.
2477
2478     This exception will be thrown by YoutubeDL when a requested entry
2479     is not found in the playlist info_dict
2480     """
2481     pass
2482
2483
2484 class SameFileError(YoutubeDLError):
2485     """Same File exception.
2486
2487     This exception will be thrown by FileDownloader objects if they detect
2488     multiple files would have to be downloaded to the same file on disk.
2489     """
2490     pass
2491
2492
2493 class PostProcessingError(YoutubeDLError):
2494     """Post Processing exception.
2495
2496     This exception may be raised by PostProcessor's .run() method to
2497     indicate an error in the postprocessing task.
2498     """
2499
2500     def __init__(self, msg):
2501         super(PostProcessingError, self).__init__(msg)
2502         self.msg = msg
2503
2504
2505 class ExistingVideoReached(YoutubeDLError):
2506     """ --max-downloads limit has been reached. """
2507     pass
2508
2509
2510 class RejectedVideoReached(YoutubeDLError):
2511     """ --max-downloads limit has been reached. """
2512     pass
2513
2514
2515 class ThrottledDownload(YoutubeDLError):
2516     """ Download speed below --throttled-rate. """
2517     pass
2518
2519
2520 class MaxDownloadsReached(YoutubeDLError):
2521     """ --max-downloads limit has been reached. """
2522     pass
2523
2524
2525 class UnavailableVideoError(YoutubeDLError):
2526     """Unavailable Format exception.
2527
2528     This exception will be thrown when a video is requested
2529     in a format that is not available for that video.
2530     """
2531     pass
2532
2533
2534 class ContentTooShortError(YoutubeDLError):
2535     """Content Too Short exception.
2536
2537     This exception may be raised by FileDownloader objects when a file they
2538     download is too small for what the server announced first, indicating
2539     the connection was probably interrupted.
2540     """
2541
2542     def __init__(self, downloaded, expected):
2543         super(ContentTooShortError, self).__init__(
2544             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2545         )
2546         # Both in bytes
2547         self.downloaded = downloaded
2548         self.expected = expected
2549
2550
2551 class XAttrMetadataError(YoutubeDLError):
2552     def __init__(self, code=None, msg='Unknown error'):
2553         super(XAttrMetadataError, self).__init__(msg)
2554         self.code = code
2555         self.msg = msg
2556
2557         # Parsing code and msg
2558         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2559                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2560             self.reason = 'NO_SPACE'
2561         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2562             self.reason = 'VALUE_TOO_LONG'
2563         else:
2564             self.reason = 'NOT_SUPPORTED'
2565
2566
2567 class XAttrUnavailableError(YoutubeDLError):
2568     pass
2569
2570
2571 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2572     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2573     # expected HTTP responses to meet HTTP/1.0 or later (see also
2574     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2575     if sys.version_info < (3, 0):
2576         kwargs['strict'] = True
2577     hc = http_class(*args, **compat_kwargs(kwargs))
2578     source_address = ydl_handler._params.get('source_address')
2579
2580     if source_address is not None:
2581         # This is to workaround _create_connection() from socket where it will try all
2582         # address data from getaddrinfo() including IPv6. This filters the result from
2583         # getaddrinfo() based on the source_address value.
2584         # This is based on the cpython socket.create_connection() function.
2585         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2586         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2587             host, port = address
2588             err = None
2589             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2590             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2591             ip_addrs = [addr for addr in addrs if addr[0] == af]
2592             if addrs and not ip_addrs:
2593                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2594                 raise socket.error(
2595                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2596                     % (ip_version, source_address[0]))
2597             for res in ip_addrs:
2598                 af, socktype, proto, canonname, sa = res
2599                 sock = None
2600                 try:
2601                     sock = socket.socket(af, socktype, proto)
2602                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2603                         sock.settimeout(timeout)
2604                     sock.bind(source_address)
2605                     sock.connect(sa)
2606                     err = None  # Explicitly break reference cycle
2607                     return sock
2608                 except socket.error as _:
2609                     err = _
2610                     if sock is not None:
2611                         sock.close()
2612             if err is not None:
2613                 raise err
2614             else:
2615                 raise socket.error('getaddrinfo returns an empty list')
2616         if hasattr(hc, '_create_connection'):
2617             hc._create_connection = _create_connection
2618         sa = (source_address, 0)
2619         if hasattr(hc, 'source_address'):  # Python 2.7+
2620             hc.source_address = sa
2621         else:  # Python 2.6
2622             def _hc_connect(self, *args, **kwargs):
2623                 sock = _create_connection(
2624                     (self.host, self.port), self.timeout, sa)
2625                 if is_https:
2626                     self.sock = ssl.wrap_socket(
2627                         sock, self.key_file, self.cert_file,
2628                         ssl_version=ssl.PROTOCOL_TLSv1)
2629                 else:
2630                     self.sock = sock
2631             hc.connect = functools.partial(_hc_connect, hc)
2632
2633     return hc
2634
2635
2636 def handle_youtubedl_headers(headers):
2637     filtered_headers = headers
2638
2639     if 'Youtubedl-no-compression' in filtered_headers:
2640         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2641         del filtered_headers['Youtubedl-no-compression']
2642
2643     return filtered_headers
2644
2645
2646 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2647     """Handler for HTTP requests and responses.
2648
2649     This class, when installed with an OpenerDirector, automatically adds
2650     the standard headers to every HTTP request and handles gzipped and
2651     deflated responses from web servers. If compression is to be avoided in
2652     a particular request, the original request in the program code only has
2653     to include the HTTP header "Youtubedl-no-compression", which will be
2654     removed before making the real request.
2655
2656     Part of this code was copied from:
2657
2658     http://techknack.net/python-urllib2-handlers/
2659
2660     Andrew Rowls, the author of that code, agreed to release it to the
2661     public domain.
2662     """
2663
2664     def __init__(self, params, *args, **kwargs):
2665         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2666         self._params = params
2667
2668     def http_open(self, req):
2669         conn_class = compat_http_client.HTTPConnection
2670
2671         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2672         if socks_proxy:
2673             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2674             del req.headers['Ytdl-socks-proxy']
2675
2676         return self.do_open(functools.partial(
2677             _create_http_connection, self, conn_class, False),
2678             req)
2679
2680     @staticmethod
2681     def deflate(data):
2682         if not data:
2683             return data
2684         try:
2685             return zlib.decompress(data, -zlib.MAX_WBITS)
2686         except zlib.error:
2687             return zlib.decompress(data)
2688
2689     def http_request(self, req):
2690         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2691         # always respected by websites, some tend to give out URLs with non percent-encoded
2692         # non-ASCII characters (see telemb.py, ard.py [#3412])
2693         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2694         # To work around aforementioned issue we will replace request's original URL with
2695         # percent-encoded one
2696         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2697         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2698         url = req.get_full_url()
2699         url_escaped = escape_url(url)
2700
2701         # Substitute URL if any change after escaping
2702         if url != url_escaped:
2703             req = update_Request(req, url=url_escaped)
2704
2705         for h, v in std_headers.items():
2706             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2707             # The dict keys are capitalized because of this bug by urllib
2708             if h.capitalize() not in req.headers:
2709                 req.add_header(h, v)
2710
2711         req.headers = handle_youtubedl_headers(req.headers)
2712
2713         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2714             # Python 2.6 is brain-dead when it comes to fragments
2715             req._Request__original = req._Request__original.partition('#')[0]
2716             req._Request__r_type = req._Request__r_type.partition('#')[0]
2717
2718         return req
2719
2720     def http_response(self, req, resp):
2721         old_resp = resp
2722         # gzip
2723         if resp.headers.get('Content-encoding', '') == 'gzip':
2724             content = resp.read()
2725             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2726             try:
2727                 uncompressed = io.BytesIO(gz.read())
2728             except IOError as original_ioerror:
2729                 # There may be junk add the end of the file
2730                 # See http://stackoverflow.com/q/4928560/35070 for details
2731                 for i in range(1, 1024):
2732                     try:
2733                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2734                         uncompressed = io.BytesIO(gz.read())
2735                     except IOError:
2736                         continue
2737                     break
2738                 else:
2739                     raise original_ioerror
2740             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2741             resp.msg = old_resp.msg
2742             del resp.headers['Content-encoding']
2743         # deflate
2744         if resp.headers.get('Content-encoding', '') == 'deflate':
2745             gz = io.BytesIO(self.deflate(resp.read()))
2746             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2747             resp.msg = old_resp.msg
2748             del resp.headers['Content-encoding']
2749         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2750         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2751         if 300 <= resp.code < 400:
2752             location = resp.headers.get('Location')
2753             if location:
2754                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2755                 if sys.version_info >= (3, 0):
2756                     location = location.encode('iso-8859-1').decode('utf-8')
2757                 else:
2758                     location = location.decode('utf-8')
2759                 location_escaped = escape_url(location)
2760                 if location != location_escaped:
2761                     del resp.headers['Location']
2762                     if sys.version_info < (3, 0):
2763                         location_escaped = location_escaped.encode('utf-8')
2764                     resp.headers['Location'] = location_escaped
2765         return resp
2766
2767     https_request = http_request
2768     https_response = http_response
2769
2770
2771 def make_socks_conn_class(base_class, socks_proxy):
2772     assert issubclass(base_class, (
2773         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2774
2775     url_components = compat_urlparse.urlparse(socks_proxy)
2776     if url_components.scheme.lower() == 'socks5':
2777         socks_type = ProxyType.SOCKS5
2778     elif url_components.scheme.lower() in ('socks', 'socks4'):
2779         socks_type = ProxyType.SOCKS4
2780     elif url_components.scheme.lower() == 'socks4a':
2781         socks_type = ProxyType.SOCKS4A
2782
2783     def unquote_if_non_empty(s):
2784         if not s:
2785             return s
2786         return compat_urllib_parse_unquote_plus(s)
2787
2788     proxy_args = (
2789         socks_type,
2790         url_components.hostname, url_components.port or 1080,
2791         True,  # Remote DNS
2792         unquote_if_non_empty(url_components.username),
2793         unquote_if_non_empty(url_components.password),
2794     )
2795
2796     class SocksConnection(base_class):
2797         def connect(self):
2798             self.sock = sockssocket()
2799             self.sock.setproxy(*proxy_args)
2800             if type(self.timeout) in (int, float):
2801                 self.sock.settimeout(self.timeout)
2802             self.sock.connect((self.host, self.port))
2803
2804             if isinstance(self, compat_http_client.HTTPSConnection):
2805                 if hasattr(self, '_context'):  # Python > 2.6
2806                     self.sock = self._context.wrap_socket(
2807                         self.sock, server_hostname=self.host)
2808                 else:
2809                     self.sock = ssl.wrap_socket(self.sock)
2810
2811     return SocksConnection
2812
2813
2814 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2815     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2816         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2817         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2818         self._params = params
2819
2820     def https_open(self, req):
2821         kwargs = {}
2822         conn_class = self._https_conn_class
2823
2824         if hasattr(self, '_context'):  # python > 2.6
2825             kwargs['context'] = self._context
2826         if hasattr(self, '_check_hostname'):  # python 3.x
2827             kwargs['check_hostname'] = self._check_hostname
2828
2829         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2830         if socks_proxy:
2831             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2832             del req.headers['Ytdl-socks-proxy']
2833
2834         return self.do_open(functools.partial(
2835             _create_http_connection, self, conn_class, True),
2836             req, **kwargs)
2837
2838
2839 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2840     """
2841     See [1] for cookie file format.
2842
2843     1. https://curl.haxx.se/docs/http-cookies.html
2844     """
2845     _HTTPONLY_PREFIX = '#HttpOnly_'
2846     _ENTRY_LEN = 7
2847     _HEADER = '''# Netscape HTTP Cookie File
2848 # This file is generated by yt-dlp.  Do not edit.
2849
2850 '''
2851     _CookieFileEntry = collections.namedtuple(
2852         'CookieFileEntry',
2853         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2854
2855     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2856         """
2857         Save cookies to a file.
2858
2859         Most of the code is taken from CPython 3.8 and slightly adapted
2860         to support cookie files with UTF-8 in both python 2 and 3.
2861         """
2862         if filename is None:
2863             if self.filename is not None:
2864                 filename = self.filename
2865             else:
2866                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2867
2868         # Store session cookies with `expires` set to 0 instead of an empty
2869         # string
2870         for cookie in self:
2871             if cookie.expires is None:
2872                 cookie.expires = 0
2873
2874         with io.open(filename, 'w', encoding='utf-8') as f:
2875             f.write(self._HEADER)
2876             now = time.time()
2877             for cookie in self:
2878                 if not ignore_discard and cookie.discard:
2879                     continue
2880                 if not ignore_expires and cookie.is_expired(now):
2881                     continue
2882                 if cookie.secure:
2883                     secure = 'TRUE'
2884                 else:
2885                     secure = 'FALSE'
2886                 if cookie.domain.startswith('.'):
2887                     initial_dot = 'TRUE'
2888                 else:
2889                     initial_dot = 'FALSE'
2890                 if cookie.expires is not None:
2891                     expires = compat_str(cookie.expires)
2892                 else:
2893                     expires = ''
2894                 if cookie.value is None:
2895                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2896                     # with no name, whereas http.cookiejar regards it as a
2897                     # cookie with no value.
2898                     name = ''
2899                     value = cookie.name
2900                 else:
2901                     name = cookie.name
2902                     value = cookie.value
2903                 f.write(
2904                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2905                                secure, expires, name, value]) + '\n')
2906
2907     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2908         """Load cookies from a file."""
2909         if filename is None:
2910             if self.filename is not None:
2911                 filename = self.filename
2912             else:
2913                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2914
2915         def prepare_line(line):
2916             if line.startswith(self._HTTPONLY_PREFIX):
2917                 line = line[len(self._HTTPONLY_PREFIX):]
2918             # comments and empty lines are fine
2919             if line.startswith('#') or not line.strip():
2920                 return line
2921             cookie_list = line.split('\t')
2922             if len(cookie_list) != self._ENTRY_LEN:
2923                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2924             cookie = self._CookieFileEntry(*cookie_list)
2925             if cookie.expires_at and not cookie.expires_at.isdigit():
2926                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2927             return line
2928
2929         cf = io.StringIO()
2930         with io.open(filename, encoding='utf-8') as f:
2931             for line in f:
2932                 try:
2933                     cf.write(prepare_line(line))
2934                 except compat_cookiejar.LoadError as e:
2935                     write_string(
2936                         'WARNING: skipping cookie file entry due to %s: %r\n'
2937                         % (e, line), sys.stderr)
2938                     continue
2939         cf.seek(0)
2940         self._really_load(cf, filename, ignore_discard, ignore_expires)
2941         # Session cookies are denoted by either `expires` field set to
2942         # an empty string or 0. MozillaCookieJar only recognizes the former
2943         # (see [1]). So we need force the latter to be recognized as session
2944         # cookies on our own.
2945         # Session cookies may be important for cookies-based authentication,
2946         # e.g. usually, when user does not check 'Remember me' check box while
2947         # logging in on a site, some important cookies are stored as session
2948         # cookies so that not recognizing them will result in failed login.
2949         # 1. https://bugs.python.org/issue17164
2950         for cookie in self:
2951             # Treat `expires=0` cookies as session cookies
2952             if cookie.expires == 0:
2953                 cookie.expires = None
2954                 cookie.discard = True
2955
2956
2957 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2958     def __init__(self, cookiejar=None):
2959         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2960
2961     def http_response(self, request, response):
2962         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2963         # characters in Set-Cookie HTTP header of last response (see
2964         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2965         # In order to at least prevent crashing we will percent encode Set-Cookie
2966         # header before HTTPCookieProcessor starts processing it.
2967         # if sys.version_info < (3, 0) and response.headers:
2968         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2969         #         set_cookie = response.headers.get(set_cookie_header)
2970         #         if set_cookie:
2971         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2972         #             if set_cookie != set_cookie_escaped:
2973         #                 del response.headers[set_cookie_header]
2974         #                 response.headers[set_cookie_header] = set_cookie_escaped
2975         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2976
2977     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2978     https_response = http_response
2979
2980
2981 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2982     """YoutubeDL redirect handler
2983
2984     The code is based on HTTPRedirectHandler implementation from CPython [1].
2985
2986     This redirect handler solves two issues:
2987      - ensures redirect URL is always unicode under python 2
2988      - introduces support for experimental HTTP response status code
2989        308 Permanent Redirect [2] used by some sites [3]
2990
2991     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2992     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2993     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2994     """
2995
2996     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2997
2998     def redirect_request(self, req, fp, code, msg, headers, newurl):
2999         """Return a Request or None in response to a redirect.
3000
3001         This is called by the http_error_30x methods when a
3002         redirection response is received.  If a redirection should
3003         take place, return a new Request to allow http_error_30x to
3004         perform the redirect.  Otherwise, raise HTTPError if no-one
3005         else should try to handle this url.  Return None if you can't
3006         but another Handler might.
3007         """
3008         m = req.get_method()
3009         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3010                  or code in (301, 302, 303) and m == "POST")):
3011             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3012         # Strictly (according to RFC 2616), 301 or 302 in response to
3013         # a POST MUST NOT cause a redirection without confirmation
3014         # from the user (of urllib.request, in this case).  In practice,
3015         # essentially all clients do redirect in this case, so we do
3016         # the same.
3017
3018         # On python 2 urlh.geturl() may sometimes return redirect URL
3019         # as byte string instead of unicode. This workaround allows
3020         # to force it always return unicode.
3021         if sys.version_info[0] < 3:
3022             newurl = compat_str(newurl)
3023
3024         # Be conciliant with URIs containing a space.  This is mainly
3025         # redundant with the more complete encoding done in http_error_302(),
3026         # but it is kept for compatibility with other callers.
3027         newurl = newurl.replace(' ', '%20')
3028
3029         CONTENT_HEADERS = ("content-length", "content-type")
3030         # NB: don't use dict comprehension for python 2.6 compatibility
3031         newheaders = dict((k, v) for k, v in req.headers.items()
3032                           if k.lower() not in CONTENT_HEADERS)
3033         return compat_urllib_request.Request(
3034             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3035             unverifiable=True)
3036
3037
3038 def extract_timezone(date_str):
3039     m = re.search(
3040         r'''(?x)
3041             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
3042             (?P<tz>Z|                                            # just the UTC Z, or
3043                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
3044                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3045                    [ ]?                                          # optional space
3046                 (?P<sign>\+|-)                                   # +/-
3047                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
3048             $)
3049         ''', date_str)
3050     if not m:
3051         timezone = datetime.timedelta()
3052     else:
3053         date_str = date_str[:-len(m.group('tz'))]
3054         if not m.group('sign'):
3055             timezone = datetime.timedelta()
3056         else:
3057             sign = 1 if m.group('sign') == '+' else -1
3058             timezone = datetime.timedelta(
3059                 hours=sign * int(m.group('hours')),
3060                 minutes=sign * int(m.group('minutes')))
3061     return timezone, date_str
3062
3063
3064 def parse_iso8601(date_str, delimiter='T', timezone=None):
3065     """ Return a UNIX timestamp from the given date """
3066
3067     if date_str is None:
3068         return None
3069
3070     date_str = re.sub(r'\.[0-9]+', '', date_str)
3071
3072     if timezone is None:
3073         timezone, date_str = extract_timezone(date_str)
3074
3075     try:
3076         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3077         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3078         return calendar.timegm(dt.timetuple())
3079     except ValueError:
3080         pass
3081
3082
3083 def date_formats(day_first=True):
3084     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3085
3086
3087 def unified_strdate(date_str, day_first=True):
3088     """Return a string with the date in the format YYYYMMDD"""
3089
3090     if date_str is None:
3091         return None
3092     upload_date = None
3093     # Replace commas
3094     date_str = date_str.replace(',', ' ')
3095     # Remove AM/PM + timezone
3096     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3097     _, date_str = extract_timezone(date_str)
3098
3099     for expression in date_formats(day_first):
3100         try:
3101             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3102         except ValueError:
3103             pass
3104     if upload_date is None:
3105         timetuple = email.utils.parsedate_tz(date_str)
3106         if timetuple:
3107             try:
3108                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3109             except ValueError:
3110                 pass
3111     if upload_date is not None:
3112         return compat_str(upload_date)
3113
3114
3115 def unified_timestamp(date_str, day_first=True):
3116     if date_str is None:
3117         return None
3118
3119     date_str = re.sub(r'[,|]', '', date_str)
3120
3121     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3122     timezone, date_str = extract_timezone(date_str)
3123
3124     # Remove AM/PM + timezone
3125     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3126
3127     # Remove unrecognized timezones from ISO 8601 alike timestamps
3128     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3129     if m:
3130         date_str = date_str[:-len(m.group('tz'))]
3131
3132     # Python only supports microseconds, so remove nanoseconds
3133     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3134     if m:
3135         date_str = m.group(1)
3136
3137     for expression in date_formats(day_first):
3138         try:
3139             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3140             return calendar.timegm(dt.timetuple())
3141         except ValueError:
3142             pass
3143     timetuple = email.utils.parsedate_tz(date_str)
3144     if timetuple:
3145         return calendar.timegm(timetuple) + pm_delta * 3600
3146
3147
3148 def determine_ext(url, default_ext='unknown_video'):
3149     if url is None or '.' not in url:
3150         return default_ext
3151     guess = url.partition('?')[0].rpartition('.')[2]
3152     if re.match(r'^[A-Za-z0-9]+$', guess):
3153         return guess
3154     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3155     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3156         return guess.rstrip('/')
3157     else:
3158         return default_ext
3159
3160
3161 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3162     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3163
3164
3165 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3166     """
3167     Return a datetime object from a string in the format YYYYMMDD or
3168     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3169
3170     format: string date format used to return datetime object from
3171     precision: round the time portion of a datetime object.
3172                 auto|microsecond|second|minute|hour|day.
3173                 auto: round to the unit provided in date_str (if applicable).
3174     """
3175     auto_precision = False
3176     if precision == 'auto':
3177         auto_precision = True
3178         precision = 'microsecond'
3179     today = datetime_round(datetime.datetime.now(), precision)
3180     if date_str in ('now', 'today'):
3181         return today
3182     if date_str == 'yesterday':
3183         return today - datetime.timedelta(days=1)
3184     match = re.match(
3185         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3186         date_str)
3187     if match is not None:
3188         start_time = datetime_from_str(match.group('start'), precision, format)
3189         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3190         unit = match.group('unit')
3191         if unit == 'month' or unit == 'year':
3192             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3193             unit = 'day'
3194         else:
3195             if unit == 'week':
3196                 unit = 'day'
3197                 time *= 7
3198             delta = datetime.timedelta(**{unit + 's': time})
3199             new_date = start_time + delta
3200         if auto_precision:
3201             return datetime_round(new_date, unit)
3202         return new_date
3203
3204     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3205
3206
3207 def date_from_str(date_str, format='%Y%m%d'):
3208     """
3209     Return a datetime object from a string in the format YYYYMMDD or
3210     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3211
3212     format: string date format used to return datetime object from
3213     """
3214     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3215
3216
3217 def datetime_add_months(dt, months):
3218     """Increment/Decrement a datetime object by months."""
3219     month = dt.month + months - 1
3220     year = dt.year + month // 12
3221     month = month % 12 + 1
3222     day = min(dt.day, calendar.monthrange(year, month)[1])
3223     return dt.replace(year, month, day)
3224
3225
3226 def datetime_round(dt, precision='day'):
3227     """
3228     Round a datetime object's time to a specific precision
3229     """
3230     if precision == 'microsecond':
3231         return dt
3232
3233     unit_seconds = {
3234         'day': 86400,
3235         'hour': 3600,
3236         'minute': 60,
3237         'second': 1,
3238     }
3239     roundto = lambda x, n: ((x + n / 2) // n) * n
3240     timestamp = calendar.timegm(dt.timetuple())
3241     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3242
3243
3244 def hyphenate_date(date_str):
3245     """
3246     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3247     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3248     if match is not None:
3249         return '-'.join(match.groups())
3250     else:
3251         return date_str
3252
3253
3254 class DateRange(object):
3255     """Represents a time interval between two dates"""
3256
3257     def __init__(self, start=None, end=None):
3258         """start and end must be strings in the format accepted by date"""
3259         if start is not None:
3260             self.start = date_from_str(start)
3261         else:
3262             self.start = datetime.datetime.min.date()
3263         if end is not None:
3264             self.end = date_from_str(end)
3265         else:
3266             self.end = datetime.datetime.max.date()
3267         if self.start > self.end:
3268             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3269
3270     @classmethod
3271     def day(cls, day):
3272         """Returns a range that only contains the given day"""
3273         return cls(day, day)
3274
3275     def __contains__(self, date):
3276         """Check if the date is in the range"""
3277         if not isinstance(date, datetime.date):
3278             date = date_from_str(date)
3279         return self.start <= date <= self.end
3280
3281     def __str__(self):
3282         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3283
3284
3285 def platform_name():
3286     """ Returns the platform name as a compat_str """
3287     res = platform.platform()
3288     if isinstance(res, bytes):
3289         res = res.decode(preferredencoding())
3290
3291     assert isinstance(res, compat_str)
3292     return res
3293
3294
3295 def get_windows_version():
3296     ''' Get Windows version. None if it's not running on Windows '''
3297     if compat_os_name == 'nt':
3298         return version_tuple(platform.win32_ver()[1])
3299     else:
3300         return None
3301
3302
3303 def _windows_write_string(s, out):
3304     """ Returns True if the string was written using special methods,
3305     False if it has yet to be written out."""
3306     # Adapted from http://stackoverflow.com/a/3259271/35070
3307
3308     import ctypes
3309     import ctypes.wintypes
3310
3311     WIN_OUTPUT_IDS = {
3312         1: -11,
3313         2: -12,
3314     }
3315
3316     try:
3317         fileno = out.fileno()
3318     except AttributeError:
3319         # If the output stream doesn't have a fileno, it's virtual
3320         return False
3321     except io.UnsupportedOperation:
3322         # Some strange Windows pseudo files?
3323         return False
3324     if fileno not in WIN_OUTPUT_IDS:
3325         return False
3326
3327     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3328         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3329         ('GetStdHandle', ctypes.windll.kernel32))
3330     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3331
3332     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3333         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3334         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3335         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3336     written = ctypes.wintypes.DWORD(0)
3337
3338     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3339     FILE_TYPE_CHAR = 0x0002
3340     FILE_TYPE_REMOTE = 0x8000
3341     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3342         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3343         ctypes.POINTER(ctypes.wintypes.DWORD))(
3344         ('GetConsoleMode', ctypes.windll.kernel32))
3345     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3346
3347     def not_a_console(handle):
3348         if handle == INVALID_HANDLE_VALUE or handle is None:
3349             return True
3350         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3351                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3352
3353     if not_a_console(h):
3354         return False
3355
3356     def next_nonbmp_pos(s):
3357         try:
3358             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3359         except StopIteration:
3360             return len(s)
3361
3362     while s:
3363         count = min(next_nonbmp_pos(s), 1024)
3364
3365         ret = WriteConsoleW(
3366             h, s, count if count else 2, ctypes.byref(written), None)
3367         if ret == 0:
3368             raise OSError('Failed to write string')
3369         if not count:  # We just wrote a non-BMP character
3370             assert written.value == 2
3371             s = s[1:]
3372         else:
3373             assert written.value > 0
3374             s = s[written.value:]
3375     return True
3376
3377
3378 def write_string(s, out=None, encoding=None):
3379     if out is None:
3380         out = sys.stderr
3381     assert type(s) == compat_str
3382
3383     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3384         if _windows_write_string(s, out):
3385             return
3386
3387     if ('b' in getattr(out, 'mode', '')
3388             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3389         byt = s.encode(encoding or preferredencoding(), 'ignore')
3390         out.write(byt)
3391     elif hasattr(out, 'buffer'):
3392         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3393         byt = s.encode(enc, 'ignore')
3394         out.buffer.write(byt)
3395     else:
3396         out.write(s)
3397     out.flush()
3398
3399
3400 def bytes_to_intlist(bs):
3401     if not bs:
3402         return []
3403     if isinstance(bs[0], int):  # Python 3
3404         return list(bs)
3405     else:
3406         return [ord(c) for c in bs]
3407
3408
3409 def intlist_to_bytes(xs):
3410     if not xs:
3411         return b''
3412     return compat_struct_pack('%dB' % len(xs), *xs)
3413
3414
3415 # Cross-platform file locking
3416 if sys.platform == 'win32':
3417     import ctypes.wintypes
3418     import msvcrt
3419
3420     class OVERLAPPED(ctypes.Structure):
3421         _fields_ = [
3422             ('Internal', ctypes.wintypes.LPVOID),
3423             ('InternalHigh', ctypes.wintypes.LPVOID),
3424             ('Offset', ctypes.wintypes.DWORD),
3425             ('OffsetHigh', ctypes.wintypes.DWORD),
3426             ('hEvent', ctypes.wintypes.HANDLE),
3427         ]
3428
3429     kernel32 = ctypes.windll.kernel32
3430     LockFileEx = kernel32.LockFileEx
3431     LockFileEx.argtypes = [
3432         ctypes.wintypes.HANDLE,     # hFile
3433         ctypes.wintypes.DWORD,      # dwFlags
3434         ctypes.wintypes.DWORD,      # dwReserved
3435         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3436         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3437         ctypes.POINTER(OVERLAPPED)  # Overlapped
3438     ]
3439     LockFileEx.restype = ctypes.wintypes.BOOL
3440     UnlockFileEx = kernel32.UnlockFileEx
3441     UnlockFileEx.argtypes = [
3442         ctypes.wintypes.HANDLE,     # hFile
3443         ctypes.wintypes.DWORD,      # dwReserved
3444         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3445         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3446         ctypes.POINTER(OVERLAPPED)  # Overlapped
3447     ]
3448     UnlockFileEx.restype = ctypes.wintypes.BOOL
3449     whole_low = 0xffffffff
3450     whole_high = 0x7fffffff
3451
3452     def _lock_file(f, exclusive):
3453         overlapped = OVERLAPPED()
3454         overlapped.Offset = 0
3455         overlapped.OffsetHigh = 0
3456         overlapped.hEvent = 0
3457         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3458         handle = msvcrt.get_osfhandle(f.fileno())
3459         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3460                           whole_low, whole_high, f._lock_file_overlapped_p):
3461             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3462
3463     def _unlock_file(f):
3464         assert f._lock_file_overlapped_p
3465         handle = msvcrt.get_osfhandle(f.fileno())
3466         if not UnlockFileEx(handle, 0,
3467                             whole_low, whole_high, f._lock_file_overlapped_p):
3468             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3469
3470 else:
3471     # Some platforms, such as Jython, is missing fcntl
3472     try:
3473         import fcntl
3474
3475         def _lock_file(f, exclusive):
3476             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3477
3478         def _unlock_file(f):
3479             fcntl.flock(f, fcntl.LOCK_UN)
3480     except ImportError:
3481         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3482
3483         def _lock_file(f, exclusive):
3484             raise IOError(UNSUPPORTED_MSG)
3485
3486         def _unlock_file(f):
3487             raise IOError(UNSUPPORTED_MSG)
3488
3489
3490 class locked_file(object):
3491     def __init__(self, filename, mode, encoding=None):
3492         assert mode in ['r', 'a', 'w']
3493         self.f = io.open(filename, mode, encoding=encoding)
3494         self.mode = mode
3495
3496     def __enter__(self):
3497         exclusive = self.mode != 'r'
3498         try:
3499             _lock_file(self.f, exclusive)
3500         except IOError:
3501             self.f.close()
3502             raise
3503         return self
3504
3505     def __exit__(self, etype, value, traceback):
3506         try:
3507             _unlock_file(self.f)
3508         finally:
3509             self.f.close()
3510
3511     def __iter__(self):
3512         return iter(self.f)
3513
3514     def write(self, *args):
3515         return self.f.write(*args)
3516
3517     def read(self, *args):
3518         return self.f.read(*args)
3519
3520
3521 def get_filesystem_encoding():
3522     encoding = sys.getfilesystemencoding()
3523     return encoding if encoding is not None else 'utf-8'
3524
3525
3526 def shell_quote(args):
3527     quoted_args = []
3528     encoding = get_filesystem_encoding()
3529     for a in args:
3530         if isinstance(a, bytes):
3531             # We may get a filename encoded with 'encodeFilename'
3532             a = a.decode(encoding)
3533         quoted_args.append(compat_shlex_quote(a))
3534     return ' '.join(quoted_args)
3535
3536
3537 def smuggle_url(url, data):
3538     """ Pass additional data in a URL for internal use. """
3539
3540     url, idata = unsmuggle_url(url, {})
3541     data.update(idata)
3542     sdata = compat_urllib_parse_urlencode(
3543         {'__youtubedl_smuggle': json.dumps(data)})
3544     return url + '#' + sdata
3545
3546
3547 def unsmuggle_url(smug_url, default=None):
3548     if '#__youtubedl_smuggle' not in smug_url:
3549         return smug_url, default
3550     url, _, sdata = smug_url.rpartition('#')
3551     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3552     data = json.loads(jsond)
3553     return url, data
3554
3555
3556 def format_bytes(bytes):
3557     if bytes is None:
3558         return 'N/A'
3559     if type(bytes) is str:
3560         bytes = float(bytes)
3561     if bytes == 0.0:
3562         exponent = 0
3563     else:
3564         exponent = int(math.log(bytes, 1024.0))
3565     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3566     converted = float(bytes) / float(1024 ** exponent)
3567     return '%.2f%s' % (converted, suffix)
3568
3569
3570 def lookup_unit_table(unit_table, s):
3571     units_re = '|'.join(re.escape(u) for u in unit_table)
3572     m = re.match(
3573         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3574     if not m:
3575         return None
3576     num_str = m.group('num').replace(',', '.')
3577     mult = unit_table[m.group('unit')]
3578     return int(float(num_str) * mult)
3579
3580
3581 def parse_filesize(s):
3582     if s is None:
3583         return None
3584
3585     # The lower-case forms are of course incorrect and unofficial,
3586     # but we support those too
3587     _UNIT_TABLE = {
3588         'B': 1,
3589         'b': 1,
3590         'bytes': 1,
3591         'KiB': 1024,
3592         'KB': 1000,
3593         'kB': 1024,
3594         'Kb': 1000,
3595         'kb': 1000,
3596         'kilobytes': 1000,
3597         'kibibytes': 1024,
3598         'MiB': 1024 ** 2,
3599         'MB': 1000 ** 2,
3600         'mB': 1024 ** 2,
3601         'Mb': 1000 ** 2,
3602         'mb': 1000 ** 2,
3603         'megabytes': 1000 ** 2,
3604         'mebibytes': 1024 ** 2,
3605         'GiB': 1024 ** 3,
3606         'GB': 1000 ** 3,
3607         'gB': 1024 ** 3,
3608         'Gb': 1000 ** 3,
3609         'gb': 1000 ** 3,
3610         'gigabytes': 1000 ** 3,
3611         'gibibytes': 1024 ** 3,
3612         'TiB': 1024 ** 4,
3613         'TB': 1000 ** 4,
3614         'tB': 1024 ** 4,
3615         'Tb': 1000 ** 4,
3616         'tb': 1000 ** 4,
3617         'terabytes': 1000 ** 4,
3618         'tebibytes': 1024 ** 4,
3619         'PiB': 1024 ** 5,
3620         'PB': 1000 ** 5,
3621         'pB': 1024 ** 5,
3622         'Pb': 1000 ** 5,
3623         'pb': 1000 ** 5,
3624         'petabytes': 1000 ** 5,
3625         'pebibytes': 1024 ** 5,
3626         'EiB': 1024 ** 6,
3627         'EB': 1000 ** 6,
3628         'eB': 1024 ** 6,
3629         'Eb': 1000 ** 6,
3630         'eb': 1000 ** 6,
3631         'exabytes': 1000 ** 6,
3632         'exbibytes': 1024 ** 6,
3633         'ZiB': 1024 ** 7,
3634         'ZB': 1000 ** 7,
3635         'zB': 1024 ** 7,
3636         'Zb': 1000 ** 7,
3637         'zb': 1000 ** 7,
3638         'zettabytes': 1000 ** 7,
3639         'zebibytes': 1024 ** 7,
3640         'YiB': 1024 ** 8,
3641         'YB': 1000 ** 8,
3642         'yB': 1024 ** 8,
3643         'Yb': 1000 ** 8,
3644         'yb': 1000 ** 8,
3645         'yottabytes': 1000 ** 8,
3646         'yobibytes': 1024 ** 8,
3647     }
3648
3649     return lookup_unit_table(_UNIT_TABLE, s)
3650
3651
3652 def parse_count(s):
3653     if s is None:
3654         return None
3655
3656     s = s.strip()
3657
3658     if re.match(r'^[\d,.]+$', s):
3659         return str_to_int(s)
3660
3661     _UNIT_TABLE = {
3662         'k': 1000,
3663         'K': 1000,
3664         'm': 1000 ** 2,
3665         'M': 1000 ** 2,
3666         'kk': 1000 ** 2,
3667         'KK': 1000 ** 2,
3668     }
3669
3670     return lookup_unit_table(_UNIT_TABLE, s)
3671
3672
3673 def parse_resolution(s):
3674     if s is None:
3675         return {}
3676
3677     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3678     if mobj:
3679         return {
3680             'width': int(mobj.group('w')),
3681             'height': int(mobj.group('h')),
3682         }
3683
3684     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3685     if mobj:
3686         return {'height': int(mobj.group(1))}
3687
3688     mobj = re.search(r'\b([48])[kK]\b', s)
3689     if mobj:
3690         return {'height': int(mobj.group(1)) * 540}
3691
3692     return {}
3693
3694
3695 def parse_bitrate(s):
3696     if not isinstance(s, compat_str):
3697         return
3698     mobj = re.search(r'\b(\d+)\s*kbps', s)
3699     if mobj:
3700         return int(mobj.group(1))
3701
3702
3703 def month_by_name(name, lang='en'):
3704     """ Return the number of a month by (locale-independently) English name """
3705
3706     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3707
3708     try:
3709         return month_names.index(name) + 1
3710     except ValueError:
3711         return None
3712
3713
3714 def month_by_abbreviation(abbrev):
3715     """ Return the number of a month by (locale-independently) English
3716         abbreviations """
3717
3718     try:
3719         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3720     except ValueError:
3721         return None
3722
3723
3724 def fix_xml_ampersands(xml_str):
3725     """Replace all the '&' by '&amp;' in XML"""
3726     return re.sub(
3727         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3728         '&amp;',
3729         xml_str)
3730
3731
3732 def setproctitle(title):
3733     assert isinstance(title, compat_str)
3734
3735     # ctypes in Jython is not complete
3736     # http://bugs.jython.org/issue2148
3737     if sys.platform.startswith('java'):
3738         return
3739
3740     try:
3741         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3742     except OSError:
3743         return
3744     except TypeError:
3745         # LoadLibrary in Windows Python 2.7.13 only expects
3746         # a bytestring, but since unicode_literals turns
3747         # every string into a unicode string, it fails.
3748         return
3749     title_bytes = title.encode('utf-8')
3750     buf = ctypes.create_string_buffer(len(title_bytes))
3751     buf.value = title_bytes
3752     try:
3753         libc.prctl(15, buf, 0, 0, 0)
3754     except AttributeError:
3755         return  # Strange libc, just skip this
3756
3757
3758 def remove_start(s, start):
3759     return s[len(start):] if s is not None and s.startswith(start) else s
3760
3761
3762 def remove_end(s, end):
3763     return s[:-len(end)] if s is not None and s.endswith(end) else s
3764
3765
3766 def remove_quotes(s):
3767     if s is None or len(s) < 2:
3768         return s
3769     for quote in ('"', "'", ):
3770         if s[0] == quote and s[-1] == quote:
3771             return s[1:-1]
3772     return s
3773
3774
3775 def get_domain(url):
3776     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3777     return domain.group('domain') if domain else None
3778
3779
3780 def url_basename(url):
3781     path = compat_urlparse.urlparse(url).path
3782     return path.strip('/').split('/')[-1]
3783
3784
3785 def base_url(url):
3786     return re.match(r'https?://[^?#&]+/', url).group()
3787
3788
3789 def urljoin(base, path):
3790     if isinstance(path, bytes):
3791         path = path.decode('utf-8')
3792     if not isinstance(path, compat_str) or not path:
3793         return None
3794     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3795         return path
3796     if isinstance(base, bytes):
3797         base = base.decode('utf-8')
3798     if not isinstance(base, compat_str) or not re.match(
3799             r'^(?:https?:)?//', base):
3800         return None
3801     return compat_urlparse.urljoin(base, path)
3802
3803
3804 class HEADRequest(compat_urllib_request.Request):
3805     def get_method(self):
3806         return 'HEAD'
3807
3808
3809 class PUTRequest(compat_urllib_request.Request):
3810     def get_method(self):
3811         return 'PUT'
3812
3813
3814 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3815     if get_attr:
3816         if v is not None:
3817             v = getattr(v, get_attr, None)
3818     if v == '':
3819         v = None
3820     if v is None:
3821         return default
3822     try:
3823         return int(v) * invscale // scale
3824     except (ValueError, TypeError):
3825         return default
3826
3827
3828 def str_or_none(v, default=None):
3829     return default if v is None else compat_str(v)
3830
3831
3832 def str_to_int(int_str):
3833     """ A more relaxed version of int_or_none """
3834     if isinstance(int_str, compat_integer_types):
3835         return int_str
3836     elif isinstance(int_str, compat_str):
3837         int_str = re.sub(r'[,\.\+]', '', int_str)
3838         return int_or_none(int_str)
3839
3840
3841 def float_or_none(v, scale=1, invscale=1, default=None):
3842     if v is None:
3843         return default
3844     try:
3845         return float(v) * invscale / scale
3846     except (ValueError, TypeError):
3847         return default
3848
3849
3850 def bool_or_none(v, default=None):
3851     return v if isinstance(v, bool) else default
3852
3853
3854 def strip_or_none(v, default=None):
3855     return v.strip() if isinstance(v, compat_str) else default
3856
3857
3858 def url_or_none(url):
3859     if not url or not isinstance(url, compat_str):
3860         return None
3861     url = url.strip()
3862     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3863
3864
3865 def strftime_or_none(timestamp, date_format, default=None):
3866     datetime_object = None
3867     try:
3868         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3869             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3870         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3871             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3872         return datetime_object.strftime(date_format)
3873     except (ValueError, TypeError, AttributeError):
3874         return default
3875
3876
3877 def parse_duration(s):
3878     if not isinstance(s, compat_basestring):
3879         return None
3880
3881     s = s.strip()
3882
3883     days, hours, mins, secs, ms = [None] * 5
3884     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3885     if m:
3886         days, hours, mins, secs, ms = m.groups()
3887     else:
3888         m = re.match(
3889             r'''(?ix)(?:P?
3890                 (?:
3891                     [0-9]+\s*y(?:ears?)?\s*
3892                 )?
3893                 (?:
3894                     [0-9]+\s*m(?:onths?)?\s*
3895                 )?
3896                 (?:
3897                     [0-9]+\s*w(?:eeks?)?\s*
3898                 )?
3899                 (?:
3900                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3901                 )?
3902                 T)?
3903                 (?:
3904                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3905                 )?
3906                 (?:
3907                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3908                 )?
3909                 (?:
3910                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3911                 )?Z?$''', s)
3912         if m:
3913             days, hours, mins, secs, ms = m.groups()
3914         else:
3915             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3916             if m:
3917                 hours, mins = m.groups()
3918             else:
3919                 return None
3920
3921     duration = 0
3922     if secs:
3923         duration += float(secs)
3924     if mins:
3925         duration += float(mins) * 60
3926     if hours:
3927         duration += float(hours) * 60 * 60
3928     if days:
3929         duration += float(days) * 24 * 60 * 60
3930     if ms:
3931         duration += float(ms)
3932     return duration
3933
3934
3935 def prepend_extension(filename, ext, expected_real_ext=None):
3936     name, real_ext = os.path.splitext(filename)
3937     return (
3938         '{0}.{1}{2}'.format(name, ext, real_ext)
3939         if not expected_real_ext or real_ext[1:] == expected_real_ext
3940         else '{0}.{1}'.format(filename, ext))
3941
3942
3943 def replace_extension(filename, ext, expected_real_ext=None):
3944     name, real_ext = os.path.splitext(filename)
3945     return '{0}.{1}'.format(
3946         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3947         ext)
3948
3949
3950 def check_executable(exe, args=[]):
3951     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3952     args can be a list of arguments for a short output (like -version) """
3953     try:
3954         process_communicate_or_kill(subprocess.Popen(
3955             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3956     except OSError:
3957         return False
3958     return exe
3959
3960
3961 def get_exe_version(exe, args=['--version'],
3962                     version_re=None, unrecognized='present'):
3963     """ Returns the version of the specified executable,
3964     or False if the executable is not present """
3965     try:
3966         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3967         # SIGTTOU if yt-dlp is run in the background.
3968         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3969         out, _ = process_communicate_or_kill(subprocess.Popen(
3970             [encodeArgument(exe)] + args,
3971             stdin=subprocess.PIPE,
3972             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3973     except OSError:
3974         return False
3975     if isinstance(out, bytes):  # Python 2.x
3976         out = out.decode('ascii', 'ignore')
3977     return detect_exe_version(out, version_re, unrecognized)
3978
3979
3980 def detect_exe_version(output, version_re=None, unrecognized='present'):
3981     assert isinstance(output, compat_str)
3982     if version_re is None:
3983         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3984     m = re.search(version_re, output)
3985     if m:
3986         return m.group(1)
3987     else:
3988         return unrecognized
3989
3990
3991 class LazyList(collections.abc.Sequence):
3992     ''' Lazy immutable list from an iterable
3993     Note that slices of a LazyList are lists and not LazyList'''
3994
3995     class IndexError(IndexError):
3996         pass
3997
3998     def __init__(self, iterable):
3999         self.__iterable = iter(iterable)
4000         self.__cache = []
4001         self.__reversed = False
4002
4003     def __iter__(self):
4004         if self.__reversed:
4005             # We need to consume the entire iterable to iterate in reverse
4006             yield from self.exhaust()
4007             return
4008         yield from self.__cache
4009         for item in self.__iterable:
4010             self.__cache.append(item)
4011             yield item
4012
4013     def __exhaust(self):
4014         self.__cache.extend(self.__iterable)
4015         return self.__cache
4016
4017     def exhaust(self):
4018         ''' Evaluate the entire iterable '''
4019         return self.__exhaust()[::-1 if self.__reversed else 1]
4020
4021     @staticmethod
4022     def __reverse_index(x):
4023         return None if x is None else -(x + 1)
4024
4025     def __getitem__(self, idx):
4026         if isinstance(idx, slice):
4027             if self.__reversed:
4028                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4029             start, stop, step = idx.start, idx.stop, idx.step or 1
4030         elif isinstance(idx, int):
4031             if self.__reversed:
4032                 idx = self.__reverse_index(idx)
4033             start, stop, step = idx, idx, 0
4034         else:
4035             raise TypeError('indices must be integers or slices')
4036         if ((start or 0) < 0 or (stop or 0) < 0
4037                 or (start is None and step < 0)
4038                 or (stop is None and step > 0)):
4039             # We need to consume the entire iterable to be able to slice from the end
4040             # Obviously, never use this with infinite iterables
4041             self.__exhaust()
4042             try:
4043                 return self.__cache[idx]
4044             except IndexError as e:
4045                 raise self.IndexError(e) from e
4046         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4047         if n > 0:
4048             self.__cache.extend(itertools.islice(self.__iterable, n))
4049         try:
4050             return self.__cache[idx]
4051         except IndexError as e:
4052             raise self.IndexError(e) from e
4053
4054     def __bool__(self):
4055         try:
4056             self[-1] if self.__reversed else self[0]
4057         except self.IndexError:
4058             return False
4059         return True
4060
4061     def __len__(self):
4062         self.__exhaust()
4063         return len(self.__cache)
4064
4065     def reverse(self):
4066         self.__reversed = not self.__reversed
4067         return self
4068
4069     def __repr__(self):
4070         # repr and str should mimic a list. So we exhaust the iterable
4071         return repr(self.exhaust())
4072
4073     def __str__(self):
4074         return repr(self.exhaust())
4075
4076
4077 class PagedList:
4078     def __len__(self):
4079         # This is only useful for tests
4080         return len(self.getslice())
4081
4082     def __init__(self, pagefunc, pagesize, use_cache=True):
4083         self._pagefunc = pagefunc
4084         self._pagesize = pagesize
4085         self._use_cache = use_cache
4086         self._cache = {}
4087
4088     def getpage(self, pagenum):
4089         page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4090         if self._use_cache:
4091             self._cache[pagenum] = page_results
4092         return page_results
4093
4094     def getslice(self, start=0, end=None):
4095         return list(self._getslice(start, end))
4096
4097     def _getslice(self, start, end):
4098         raise NotImplementedError('This method must be implemented by subclasses')
4099
4100     def __getitem__(self, idx):
4101         # NOTE: cache must be enabled if this is used
4102         if not isinstance(idx, int) or idx < 0:
4103             raise TypeError('indices must be non-negative integers')
4104         entries = self.getslice(idx, idx + 1)
4105         return entries[0] if entries else None
4106
4107
4108 class OnDemandPagedList(PagedList):
4109     def _getslice(self, start, end):
4110         for pagenum in itertools.count(start // self._pagesize):
4111             firstid = pagenum * self._pagesize
4112             nextfirstid = pagenum * self._pagesize + self._pagesize
4113             if start >= nextfirstid:
4114                 continue
4115
4116             startv = (
4117                 start % self._pagesize
4118                 if firstid <= start < nextfirstid
4119                 else 0)
4120             endv = (
4121                 ((end - 1) % self._pagesize) + 1
4122                 if (end is not None and firstid <= end <= nextfirstid)
4123                 else None)
4124
4125             page_results = self.getpage(pagenum)
4126             if startv != 0 or endv is not None:
4127                 page_results = page_results[startv:endv]
4128             yield from page_results
4129
4130             # A little optimization - if current page is not "full", ie. does
4131             # not contain page_size videos then we can assume that this page
4132             # is the last one - there are no more ids on further pages -
4133             # i.e. no need to query again.
4134             if len(page_results) + startv < self._pagesize:
4135                 break
4136
4137             # If we got the whole page, but the next page is not interesting,
4138             # break out early as well
4139             if end == nextfirstid:
4140                 break
4141
4142
4143 class InAdvancePagedList(PagedList):
4144     def __init__(self, pagefunc, pagecount, pagesize):
4145         self._pagecount = pagecount
4146         PagedList.__init__(self, pagefunc, pagesize, True)
4147
4148     def _getslice(self, start, end):
4149         start_page = start // self._pagesize
4150         end_page = (
4151             self._pagecount if end is None else (end // self._pagesize + 1))
4152         skip_elems = start - start_page * self._pagesize
4153         only_more = None if end is None else end - start
4154         for pagenum in range(start_page, end_page):
4155             page_results = self.getpage(pagenum)
4156             if skip_elems:
4157                 page_results = page_results[skip_elems:]
4158                 skip_elems = None
4159             if only_more is not None:
4160                 if len(page_results) < only_more:
4161                     only_more -= len(page_results)
4162                 else:
4163                     yield from page_results[:only_more]
4164                     break
4165             yield from page_results
4166
4167
4168 def uppercase_escape(s):
4169     unicode_escape = codecs.getdecoder('unicode_escape')
4170     return re.sub(
4171         r'\\U[0-9a-fA-F]{8}',
4172         lambda m: unicode_escape(m.group(0))[0],
4173         s)
4174
4175
4176 def lowercase_escape(s):
4177     unicode_escape = codecs.getdecoder('unicode_escape')
4178     return re.sub(
4179         r'\\u[0-9a-fA-F]{4}',
4180         lambda m: unicode_escape(m.group(0))[0],
4181         s)
4182
4183
4184 def escape_rfc3986(s):
4185     """Escape non-ASCII characters as suggested by RFC 3986"""
4186     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4187         s = s.encode('utf-8')
4188     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4189
4190
4191 def escape_url(url):
4192     """Escape URL as suggested by RFC 3986"""
4193     url_parsed = compat_urllib_parse_urlparse(url)
4194     return url_parsed._replace(
4195         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4196         path=escape_rfc3986(url_parsed.path),
4197         params=escape_rfc3986(url_parsed.params),
4198         query=escape_rfc3986(url_parsed.query),
4199         fragment=escape_rfc3986(url_parsed.fragment)
4200     ).geturl()
4201
4202
4203 def parse_qs(url):
4204     return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4205
4206
4207 def read_batch_urls(batch_fd):
4208     def fixup(url):
4209         if not isinstance(url, compat_str):
4210             url = url.decode('utf-8', 'replace')
4211         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4212         for bom in BOM_UTF8:
4213             if url.startswith(bom):
4214                 url = url[len(bom):]
4215         url = url.lstrip()
4216         if not url or url.startswith(('#', ';', ']')):
4217             return False
4218         # "#" cannot be stripped out since it is part of the URI
4219         # However, it can be safely stipped out if follwing a whitespace
4220         return re.split(r'\s#', url, 1)[0].rstrip()
4221
4222     with contextlib.closing(batch_fd) as fd:
4223         return [url for url in map(fixup, fd) if url]
4224
4225
4226 def urlencode_postdata(*args, **kargs):
4227     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4228
4229
4230 def update_url_query(url, query):
4231     if not query:
4232         return url
4233     parsed_url = compat_urlparse.urlparse(url)
4234     qs = compat_parse_qs(parsed_url.query)
4235     qs.update(query)
4236     return compat_urlparse.urlunparse(parsed_url._replace(
4237         query=compat_urllib_parse_urlencode(qs, True)))
4238
4239
4240 def update_Request(req, url=None, data=None, headers={}, query={}):
4241     req_headers = req.headers.copy()
4242     req_headers.update(headers)
4243     req_data = data or req.data
4244     req_url = update_url_query(url or req.get_full_url(), query)
4245     req_get_method = req.get_method()
4246     if req_get_method == 'HEAD':
4247         req_type = HEADRequest
4248     elif req_get_method == 'PUT':
4249         req_type = PUTRequest
4250     else:
4251         req_type = compat_urllib_request.Request
4252     new_req = req_type(
4253         req_url, data=req_data, headers=req_headers,
4254         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4255     if hasattr(req, 'timeout'):
4256         new_req.timeout = req.timeout
4257     return new_req
4258
4259
4260 def _multipart_encode_impl(data, boundary):
4261     content_type = 'multipart/form-data; boundary=%s' % boundary
4262
4263     out = b''
4264     for k, v in data.items():
4265         out += b'--' + boundary.encode('ascii') + b'\r\n'
4266         if isinstance(k, compat_str):
4267             k = k.encode('utf-8')
4268         if isinstance(v, compat_str):
4269             v = v.encode('utf-8')
4270         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4271         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4272         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4273         if boundary.encode('ascii') in content:
4274             raise ValueError('Boundary overlaps with data')
4275         out += content
4276
4277     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4278
4279     return out, content_type
4280
4281
4282 def multipart_encode(data, boundary=None):
4283     '''
4284     Encode a dict to RFC 7578-compliant form-data
4285
4286     data:
4287         A dict where keys and values can be either Unicode or bytes-like
4288         objects.
4289     boundary:
4290         If specified a Unicode object, it's used as the boundary. Otherwise
4291         a random boundary is generated.
4292
4293     Reference: https://tools.ietf.org/html/rfc7578
4294     '''
4295     has_specified_boundary = boundary is not None
4296
4297     while True:
4298         if boundary is None:
4299             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4300
4301         try:
4302             out, content_type = _multipart_encode_impl(data, boundary)
4303             break
4304         except ValueError:
4305             if has_specified_boundary:
4306                 raise
4307             boundary = None
4308
4309     return out, content_type
4310
4311
4312 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4313     if isinstance(key_or_keys, (list, tuple)):
4314         for key in key_or_keys:
4315             if key not in d or d[key] is None or skip_false_values and not d[key]:
4316                 continue
4317             return d[key]
4318         return default
4319     return d.get(key_or_keys, default)
4320
4321
4322 def try_get(src, getter, expected_type=None):
4323     for get in variadic(getter):
4324         try:
4325             v = get(src)
4326         except (AttributeError, KeyError, TypeError, IndexError):
4327             pass
4328         else:
4329             if expected_type is None or isinstance(v, expected_type):
4330                 return v
4331
4332
4333 def merge_dicts(*dicts):
4334     merged = {}
4335     for a_dict in dicts:
4336         for k, v in a_dict.items():
4337             if v is None:
4338                 continue
4339             if (k not in merged
4340                     or (isinstance(v, compat_str) and v
4341                         and isinstance(merged[k], compat_str)
4342                         and not merged[k])):
4343                 merged[k] = v
4344     return merged
4345
4346
4347 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4348     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4349
4350
4351 US_RATINGS = {
4352     'G': 0,
4353     'PG': 10,
4354     'PG-13': 13,
4355     'R': 16,
4356     'NC': 18,
4357 }
4358
4359
4360 TV_PARENTAL_GUIDELINES = {
4361     'TV-Y': 0,
4362     'TV-Y7': 7,
4363     'TV-G': 0,
4364     'TV-PG': 0,
4365     'TV-14': 14,
4366     'TV-MA': 17,
4367 }
4368
4369
4370 def parse_age_limit(s):
4371     if type(s) == int:
4372         return s if 0 <= s <= 21 else None
4373     if not isinstance(s, compat_basestring):
4374         return None
4375     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4376     if m:
4377         return int(m.group('age'))
4378     s = s.upper()
4379     if s in US_RATINGS:
4380         return US_RATINGS[s]
4381     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4382     if m:
4383         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4384     return None
4385
4386
4387 def strip_jsonp(code):
4388     return re.sub(
4389         r'''(?sx)^
4390             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4391             (?:\s*&&\s*(?P=func_name))?
4392             \s*\(\s*(?P<callback_data>.*)\);?
4393             \s*?(?://[^\n]*)*$''',
4394         r'\g<callback_data>', code)
4395
4396
4397 def js_to_json(code, vars={}):
4398     # vars is a dict of var, val pairs to substitute
4399     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4400     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4401     INTEGER_TABLE = (
4402         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4403         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4404     )
4405
4406     def fix_kv(m):
4407         v = m.group(0)
4408         if v in ('true', 'false', 'null'):
4409             return v
4410         elif v in ('undefined', 'void 0'):
4411             return 'null'
4412         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4413             return ""
4414
4415         if v[0] in ("'", '"'):
4416             v = re.sub(r'(?s)\\.|"', lambda m: {
4417                 '"': '\\"',
4418                 "\\'": "'",
4419                 '\\\n': '',
4420                 '\\x': '\\u00',
4421             }.get(m.group(0), m.group(0)), v[1:-1])
4422         else:
4423             for regex, base in INTEGER_TABLE:
4424                 im = re.match(regex, v)
4425                 if im:
4426                     i = int(im.group(1), base)
4427                     return '"%d":' % i if v.endswith(':') else '%d' % i
4428
4429             if v in vars:
4430                 return vars[v]
4431
4432         return '"%s"' % v
4433
4434     return re.sub(r'''(?sx)
4435         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4436         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4437         {comment}|,(?={skip}[\]}}])|
4438         void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4439         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4440         [0-9]+(?={skip}:)|
4441         !+
4442         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4443
4444
4445 def qualities(quality_ids):
4446     """ Get a numeric quality value out of a list of possible values """
4447     def q(qid):
4448         try:
4449             return quality_ids.index(qid)
4450         except ValueError:
4451             return -1
4452     return q
4453
4454
4455 DEFAULT_OUTTMPL = {
4456     'default': '%(title)s [%(id)s].%(ext)s',
4457     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4458 }
4459 OUTTMPL_TYPES = {
4460     'chapter': None,
4461     'subtitle': None,
4462     'thumbnail': None,
4463     'description': 'description',
4464     'annotation': 'annotations.xml',
4465     'infojson': 'info.json',
4466     'pl_thumbnail': None,
4467     'pl_description': 'description',
4468     'pl_infojson': 'info.json',
4469 }
4470
4471 # As of [1] format syntax is:
4472 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4473 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4474 STR_FORMAT_RE_TMPL = r'''(?x)
4475     (?<!%)(?P<prefix>(?:%%)*)
4476     %
4477     (?P<has_key>\((?P<key>{0})\))?
4478     (?P<format>
4479         (?P<conversion>[#0\-+ ]+)?
4480         (?P<min_width>\d+)?
4481         (?P<precision>\.\d+)?
4482         (?P<len_mod>[hlL])?  # unused in python
4483         {1}  # conversion type
4484     )
4485 '''
4486
4487
4488 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4489
4490
4491 def limit_length(s, length):
4492     """ Add ellipses to overly long strings """
4493     if s is None:
4494         return None
4495     ELLIPSES = '...'
4496     if len(s) > length:
4497         return s[:length - len(ELLIPSES)] + ELLIPSES
4498     return s
4499
4500
4501 def version_tuple(v):
4502     return tuple(int(e) for e in re.split(r'[-.]', v))
4503
4504
4505 def is_outdated_version(version, limit, assume_new=True):
4506     if not version:
4507         return not assume_new
4508     try:
4509         return version_tuple(version) < version_tuple(limit)
4510     except ValueError:
4511         return not assume_new
4512
4513
4514 def ytdl_is_updateable():
4515     """ Returns if yt-dlp can be updated with -U """
4516     return False
4517
4518     from zipimport import zipimporter
4519
4520     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4521
4522
4523 def args_to_str(args):
4524     # Get a short string representation for a subprocess command
4525     return ' '.join(compat_shlex_quote(a) for a in args)
4526
4527
4528 def error_to_compat_str(err):
4529     err_str = str(err)
4530     # On python 2 error byte string must be decoded with proper
4531     # encoding rather than ascii
4532     if sys.version_info[0] < 3:
4533         err_str = err_str.decode(preferredencoding())
4534     return err_str
4535
4536
4537 def mimetype2ext(mt):
4538     if mt is None:
4539         return None
4540
4541     ext = {
4542         'audio/mp4': 'm4a',
4543         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4544         # it's the most popular one
4545         'audio/mpeg': 'mp3',
4546         'audio/x-wav': 'wav',
4547     }.get(mt)
4548     if ext is not None:
4549         return ext
4550
4551     _, _, res = mt.rpartition('/')
4552     res = res.split(';')[0].strip().lower()
4553
4554     return {
4555         '3gpp': '3gp',
4556         'smptett+xml': 'tt',
4557         'ttaf+xml': 'dfxp',
4558         'ttml+xml': 'ttml',
4559         'x-flv': 'flv',
4560         'x-mp4-fragmented': 'mp4',
4561         'x-ms-sami': 'sami',
4562         'x-ms-wmv': 'wmv',
4563         'mpegurl': 'm3u8',
4564         'x-mpegurl': 'm3u8',
4565         'vnd.apple.mpegurl': 'm3u8',
4566         'dash+xml': 'mpd',
4567         'f4m+xml': 'f4m',
4568         'hds+xml': 'f4m',
4569         'vnd.ms-sstr+xml': 'ism',
4570         'quicktime': 'mov',
4571         'mp2t': 'ts',
4572         'x-wav': 'wav',
4573     }.get(res, res)
4574
4575
4576 def parse_codecs(codecs_str):
4577     # http://tools.ietf.org/html/rfc6381
4578     if not codecs_str:
4579         return {}
4580     split_codecs = list(filter(None, map(
4581         str.strip, codecs_str.strip().strip(',').split(','))))
4582     vcodec, acodec = None, None
4583     for full_codec in split_codecs:
4584         codec = full_codec.split('.')[0]
4585         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4586             if not vcodec:
4587                 vcodec = full_codec
4588         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4589             if not acodec:
4590                 acodec = full_codec
4591         else:
4592             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4593     if not vcodec and not acodec:
4594         if len(split_codecs) == 2:
4595             return {
4596                 'vcodec': split_codecs[0],
4597                 'acodec': split_codecs[1],
4598             }
4599     else:
4600         return {
4601             'vcodec': vcodec or 'none',
4602             'acodec': acodec or 'none',
4603         }
4604     return {}
4605
4606
4607 def urlhandle_detect_ext(url_handle):
4608     getheader = url_handle.headers.get
4609
4610     cd = getheader('Content-Disposition')
4611     if cd:
4612         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4613         if m:
4614             e = determine_ext(m.group('filename'), default_ext=None)
4615             if e:
4616                 return e
4617
4618     return mimetype2ext(getheader('Content-Type'))
4619
4620
4621 def encode_data_uri(data, mime_type):
4622     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4623
4624
4625 def age_restricted(content_limit, age_limit):
4626     """ Returns True iff the content should be blocked """
4627
4628     if age_limit is None:  # No limit set
4629         return False
4630     if content_limit is None:
4631         return False  # Content available for everyone
4632     return age_limit < content_limit
4633
4634
4635 def is_html(first_bytes):
4636     """ Detect whether a file contains HTML by examining its first bytes. """
4637
4638     BOMS = [
4639         (b'\xef\xbb\xbf', 'utf-8'),
4640         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4641         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4642         (b'\xff\xfe', 'utf-16-le'),
4643         (b'\xfe\xff', 'utf-16-be'),
4644     ]
4645     for bom, enc in BOMS:
4646         if first_bytes.startswith(bom):
4647             s = first_bytes[len(bom):].decode(enc, 'replace')
4648             break
4649     else:
4650         s = first_bytes.decode('utf-8', 'replace')
4651
4652     return re.match(r'^\s*<', s)
4653
4654
4655 def determine_protocol(info_dict):
4656     protocol = info_dict.get('protocol')
4657     if protocol is not None:
4658         return protocol
4659
4660     url = info_dict['url']
4661     if url.startswith('rtmp'):
4662         return 'rtmp'
4663     elif url.startswith('mms'):
4664         return 'mms'
4665     elif url.startswith('rtsp'):
4666         return 'rtsp'
4667
4668     ext = determine_ext(url)
4669     if ext == 'm3u8':
4670         return 'm3u8'
4671     elif ext == 'f4m':
4672         return 'f4m'
4673
4674     return compat_urllib_parse_urlparse(url).scheme
4675
4676
4677 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4678     """ Render a list of rows, each as a list of values """
4679
4680     def get_max_lens(table):
4681         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4682
4683     def filter_using_list(row, filterArray):
4684         return [col for (take, col) in zip(filterArray, row) if take]
4685
4686     if hideEmpty:
4687         max_lens = get_max_lens(data)
4688         header_row = filter_using_list(header_row, max_lens)
4689         data = [filter_using_list(row, max_lens) for row in data]
4690
4691     table = [header_row] + data
4692     max_lens = get_max_lens(table)
4693     if delim:
4694         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4695     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4696     return '\n'.join(format_str % tuple(row) for row in table)
4697
4698
4699 def _match_one(filter_part, dct, incomplete):
4700     # TODO: Generalize code with YoutubeDL._build_format_filter
4701     STRING_OPERATORS = {
4702         '*=': operator.contains,
4703         '^=': lambda attr, value: attr.startswith(value),
4704         '$=': lambda attr, value: attr.endswith(value),
4705         '~=': lambda attr, value: re.search(value, attr),
4706     }
4707     COMPARISON_OPERATORS = {
4708         **STRING_OPERATORS,
4709         '<=': operator.le,  # "<=" must be defined above "<"
4710         '<': operator.lt,
4711         '>=': operator.ge,
4712         '>': operator.gt,
4713         '=': operator.eq,
4714     }
4715
4716     operator_rex = re.compile(r'''(?x)\s*
4717         (?P<key>[a-z_]+)
4718         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4719         (?:
4720             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4721             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4722             (?P<strval>.+?)
4723         )
4724         \s*$
4725         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4726     m = operator_rex.search(filter_part)
4727     if m:
4728         unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4729         if m.group('negation'):
4730             op = lambda attr, value: not unnegated_op(attr, value)
4731         else:
4732             op = unnegated_op
4733         actual_value = dct.get(m.group('key'))
4734         if (m.group('quotedstrval') is not None
4735             or m.group('strval') is not None
4736             # If the original field is a string and matching comparisonvalue is
4737             # a number we should respect the origin of the original field
4738             # and process comparison value as a string (see
4739             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4740             or actual_value is not None and m.group('intval') is not None
4741                 and isinstance(actual_value, compat_str)):
4742             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4743             quote = m.group('quote')
4744             if quote is not None:
4745                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4746         else:
4747             if m.group('op') in STRING_OPERATORS:
4748                 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4749             try:
4750                 comparison_value = int(m.group('intval'))
4751             except ValueError:
4752                 comparison_value = parse_filesize(m.group('intval'))
4753                 if comparison_value is None:
4754                     comparison_value = parse_filesize(m.group('intval') + 'B')
4755                 if comparison_value is None:
4756                     raise ValueError(
4757                         'Invalid integer value %r in filter part %r' % (
4758                             m.group('intval'), filter_part))
4759         if actual_value is None:
4760             return incomplete or m.group('none_inclusive')
4761         return op(actual_value, comparison_value)
4762
4763     UNARY_OPERATORS = {
4764         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4765         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4766     }
4767     operator_rex = re.compile(r'''(?x)\s*
4768         (?P<op>%s)\s*(?P<key>[a-z_]+)
4769         \s*$
4770         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4771     m = operator_rex.search(filter_part)
4772     if m:
4773         op = UNARY_OPERATORS[m.group('op')]
4774         actual_value = dct.get(m.group('key'))
4775         if incomplete and actual_value is None:
4776             return True
4777         return op(actual_value)
4778
4779     raise ValueError('Invalid filter part %r' % filter_part)
4780
4781
4782 def match_str(filter_str, dct, incomplete=False):
4783     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4784         When incomplete, all conditions passes on missing fields
4785     """
4786     return all(
4787         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4788         for filter_part in re.split(r'(?<!\\)&', filter_str))
4789
4790
4791 def match_filter_func(filter_str):
4792     def _match_func(info_dict, *args, **kwargs):
4793         if match_str(filter_str, info_dict, *args, **kwargs):
4794             return None
4795         else:
4796             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4797             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4798     return _match_func
4799
4800
4801 def parse_dfxp_time_expr(time_expr):
4802     if not time_expr:
4803         return
4804
4805     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4806     if mobj:
4807         return float(mobj.group('time_offset'))
4808
4809     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4810     if mobj:
4811         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4812
4813
4814 def srt_subtitles_timecode(seconds):
4815     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4816
4817
4818 def dfxp2srt(dfxp_data):
4819     '''
4820     @param dfxp_data A bytes-like object containing DFXP data
4821     @returns A unicode object containing converted SRT data
4822     '''
4823     LEGACY_NAMESPACES = (
4824         (b'http://www.w3.org/ns/ttml', [
4825             b'http://www.w3.org/2004/11/ttaf1',
4826             b'http://www.w3.org/2006/04/ttaf1',
4827             b'http://www.w3.org/2006/10/ttaf1',
4828         ]),
4829         (b'http://www.w3.org/ns/ttml#styling', [
4830             b'http://www.w3.org/ns/ttml#style',
4831         ]),
4832     )
4833
4834     SUPPORTED_STYLING = [
4835         'color',
4836         'fontFamily',
4837         'fontSize',
4838         'fontStyle',
4839         'fontWeight',
4840         'textDecoration'
4841     ]
4842
4843     _x = functools.partial(xpath_with_ns, ns_map={
4844         'xml': 'http://www.w3.org/XML/1998/namespace',
4845         'ttml': 'http://www.w3.org/ns/ttml',
4846         'tts': 'http://www.w3.org/ns/ttml#styling',
4847     })
4848
4849     styles = {}
4850     default_style = {}
4851
4852     class TTMLPElementParser(object):
4853         _out = ''
4854         _unclosed_elements = []
4855         _applied_styles = []
4856
4857         def start(self, tag, attrib):
4858             if tag in (_x('ttml:br'), 'br'):
4859                 self._out += '\n'
4860             else:
4861                 unclosed_elements = []
4862                 style = {}
4863                 element_style_id = attrib.get('style')
4864                 if default_style:
4865                     style.update(default_style)
4866                 if element_style_id:
4867                     style.update(styles.get(element_style_id, {}))
4868                 for prop in SUPPORTED_STYLING:
4869                     prop_val = attrib.get(_x('tts:' + prop))
4870                     if prop_val:
4871                         style[prop] = prop_val
4872                 if style:
4873                     font = ''
4874                     for k, v in sorted(style.items()):
4875                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4876                             continue
4877                         if k == 'color':
4878                             font += ' color="%s"' % v
4879                         elif k == 'fontSize':
4880                             font += ' size="%s"' % v
4881                         elif k == 'fontFamily':
4882                             font += ' face="%s"' % v
4883                         elif k == 'fontWeight' and v == 'bold':
4884                             self._out += '<b>'
4885                             unclosed_elements.append('b')
4886                         elif k == 'fontStyle' and v == 'italic':
4887                             self._out += '<i>'
4888                             unclosed_elements.append('i')
4889                         elif k == 'textDecoration' and v == 'underline':
4890                             self._out += '<u>'
4891                             unclosed_elements.append('u')
4892                     if font:
4893                         self._out += '<font' + font + '>'
4894                         unclosed_elements.append('font')
4895                     applied_style = {}
4896                     if self._applied_styles:
4897                         applied_style.update(self._applied_styles[-1])
4898                     applied_style.update(style)
4899                     self._applied_styles.append(applied_style)
4900                 self._unclosed_elements.append(unclosed_elements)
4901
4902         def end(self, tag):
4903             if tag not in (_x('ttml:br'), 'br'):
4904                 unclosed_elements = self._unclosed_elements.pop()
4905                 for element in reversed(unclosed_elements):
4906                     self._out += '</%s>' % element
4907                 if unclosed_elements and self._applied_styles:
4908                     self._applied_styles.pop()
4909
4910         def data(self, data):
4911             self._out += data
4912
4913         def close(self):
4914             return self._out.strip()
4915
4916     def parse_node(node):
4917         target = TTMLPElementParser()
4918         parser = xml.etree.ElementTree.XMLParser(target=target)
4919         parser.feed(xml.etree.ElementTree.tostring(node))
4920         return parser.close()
4921
4922     for k, v in LEGACY_NAMESPACES:
4923         for ns in v:
4924             dfxp_data = dfxp_data.replace(ns, k)
4925
4926     dfxp = compat_etree_fromstring(dfxp_data)
4927     out = []
4928     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4929
4930     if not paras:
4931         raise ValueError('Invalid dfxp/TTML subtitle')
4932
4933     repeat = False
4934     while True:
4935         for style in dfxp.findall(_x('.//ttml:style')):
4936             style_id = style.get('id') or style.get(_x('xml:id'))
4937             if not style_id:
4938                 continue
4939             parent_style_id = style.get('style')
4940             if parent_style_id:
4941                 if parent_style_id not in styles:
4942                     repeat = True
4943                     continue
4944                 styles[style_id] = styles[parent_style_id].copy()
4945             for prop in SUPPORTED_STYLING:
4946                 prop_val = style.get(_x('tts:' + prop))
4947                 if prop_val:
4948                     styles.setdefault(style_id, {})[prop] = prop_val
4949         if repeat:
4950             repeat = False
4951         else:
4952             break
4953
4954     for p in ('body', 'div'):
4955         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4956         if ele is None:
4957             continue
4958         style = styles.get(ele.get('style'))
4959         if not style:
4960             continue
4961         default_style.update(style)
4962
4963     for para, index in zip(paras, itertools.count(1)):
4964         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4965         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4966         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4967         if begin_time is None:
4968             continue
4969         if not end_time:
4970             if not dur:
4971                 continue
4972             end_time = begin_time + dur
4973         out.append('%d\n%s --> %s\n%s\n\n' % (
4974             index,
4975             srt_subtitles_timecode(begin_time),
4976             srt_subtitles_timecode(end_time),
4977             parse_node(para)))
4978
4979     return ''.join(out)
4980
4981
4982 def cli_option(params, command_option, param):
4983     param = params.get(param)
4984     if param:
4985         param = compat_str(param)
4986     return [command_option, param] if param is not None else []
4987
4988
4989 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4990     param = params.get(param)
4991     if param is None:
4992         return []
4993     assert isinstance(param, bool)
4994     if separator:
4995         return [command_option + separator + (true_value if param else false_value)]
4996     return [command_option, true_value if param else false_value]
4997
4998
4999 def cli_valueless_option(params, command_option, param, expected_value=True):
5000     param = params.get(param)
5001     return [command_option] if param == expected_value else []
5002
5003
5004 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5005     if isinstance(argdict, (list, tuple)):  # for backward compatibility
5006         if use_compat:
5007             return argdict
5008         else:
5009             argdict = None
5010     if argdict is None:
5011         return default
5012     assert isinstance(argdict, dict)
5013
5014     assert isinstance(keys, (list, tuple))
5015     for key_list in keys:
5016         arg_list = list(filter(
5017             lambda x: x is not None,
5018             [argdict.get(key.lower()) for key in variadic(key_list)]))
5019         if arg_list:
5020             return [arg for args in arg_list for arg in args]
5021     return default
5022
5023
5024 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5025     main_key, exe = main_key.lower(), exe.lower()
5026     root_key = exe if main_key == exe else f'{main_key}+{exe}'
5027     keys = [f'{root_key}{k}' for k in (keys or [''])]
5028     if root_key in keys:
5029         if main_key != exe:
5030             keys.append((main_key, exe))
5031         keys.append('default')
5032     else:
5033         use_compat = False
5034     return cli_configuration_args(argdict, keys, default, use_compat)
5035
5036
5037 class ISO639Utils(object):
5038     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5039     _lang_map = {
5040         'aa': 'aar',
5041         'ab': 'abk',
5042         'ae': 'ave',
5043         'af': 'afr',
5044         'ak': 'aka',
5045         'am': 'amh',
5046         'an': 'arg',
5047         'ar': 'ara',
5048         'as': 'asm',
5049         'av': 'ava',
5050         'ay': 'aym',
5051         'az': 'aze',
5052         'ba': 'bak',
5053         'be': 'bel',
5054         'bg': 'bul',
5055         'bh': 'bih',
5056         'bi': 'bis',
5057         'bm': 'bam',
5058         'bn': 'ben',
5059         'bo': 'bod',
5060         'br': 'bre',
5061         'bs': 'bos',
5062         'ca': 'cat',
5063         'ce': 'che',
5064         'ch': 'cha',
5065         'co': 'cos',
5066         'cr': 'cre',
5067         'cs': 'ces',
5068         'cu': 'chu',
5069         'cv': 'chv',
5070         'cy': 'cym',
5071         'da': 'dan',
5072         'de': 'deu',
5073         'dv': 'div',
5074         'dz': 'dzo',
5075         'ee': 'ewe',
5076         'el': 'ell',
5077         'en': 'eng',
5078         'eo': 'epo',
5079         'es': 'spa',
5080         'et': 'est',
5081         'eu': 'eus',
5082         'fa': 'fas',
5083         'ff': 'ful',
5084         'fi': 'fin',
5085         'fj': 'fij',
5086         'fo': 'fao',
5087         'fr': 'fra',
5088         'fy': 'fry',
5089         'ga': 'gle',
5090         'gd': 'gla',
5091         'gl': 'glg',
5092         'gn': 'grn',
5093         'gu': 'guj',
5094         'gv': 'glv',
5095         'ha': 'hau',
5096         'he': 'heb',
5097         'iw': 'heb',  # Replaced by he in 1989 revision
5098         'hi': 'hin',
5099         'ho': 'hmo',
5100         'hr': 'hrv',
5101         'ht': 'hat',
5102         'hu': 'hun',
5103         'hy': 'hye',
5104         'hz': 'her',
5105         'ia': 'ina',
5106         'id': 'ind',
5107         'in': 'ind',  # Replaced by id in 1989 revision
5108         'ie': 'ile',
5109         'ig': 'ibo',
5110         'ii': 'iii',
5111         'ik': 'ipk',
5112         'io': 'ido',
5113         'is': 'isl',
5114         'it': 'ita',
5115         'iu': 'iku',
5116         'ja': 'jpn',
5117         'jv': 'jav',
5118         'ka': 'kat',
5119         'kg': 'kon',
5120         'ki': 'kik',
5121         'kj': 'kua',
5122         'kk': 'kaz',
5123         'kl': 'kal',
5124         'km': 'khm',
5125         'kn': 'kan',
5126         'ko': 'kor',
5127         'kr': 'kau',
5128         'ks': 'kas',
5129         'ku': 'kur',
5130         'kv': 'kom',
5131         'kw': 'cor',
5132         'ky': 'kir',
5133         'la': 'lat',
5134         'lb': 'ltz',
5135         'lg': 'lug',
5136         'li': 'lim',
5137         'ln': 'lin',
5138         'lo': 'lao',
5139         'lt': 'lit',
5140         'lu': 'lub',
5141         'lv': 'lav',
5142         'mg': 'mlg',
5143         'mh': 'mah',
5144         'mi': 'mri',
5145         'mk': 'mkd',
5146         'ml': 'mal',
5147         'mn': 'mon',
5148         'mr': 'mar',
5149         'ms': 'msa',
5150         'mt': 'mlt',
5151         'my': 'mya',
5152         'na': 'nau',
5153         'nb': 'nob',
5154         'nd': 'nde',
5155         'ne': 'nep',
5156         'ng': 'ndo',
5157         'nl': 'nld',
5158         'nn': 'nno',
5159         'no': 'nor',
5160         'nr': 'nbl',
5161         'nv': 'nav',
5162         'ny': 'nya',
5163         'oc': 'oci',
5164         'oj': 'oji',
5165         'om': 'orm',
5166         'or': 'ori',
5167         'os': 'oss',
5168         'pa': 'pan',
5169         'pi': 'pli',
5170         'pl': 'pol',
5171         'ps': 'pus',
5172         'pt': 'por',
5173         'qu': 'que',
5174         'rm': 'roh',
5175         'rn': 'run',
5176         'ro': 'ron',
5177         'ru': 'rus',
5178         'rw': 'kin',
5179         'sa': 'san',
5180         'sc': 'srd',
5181         'sd': 'snd',
5182         'se': 'sme',
5183         'sg': 'sag',
5184         'si': 'sin',
5185         'sk': 'slk',
5186         'sl': 'slv',
5187         'sm': 'smo',
5188         'sn': 'sna',
5189         'so': 'som',
5190         'sq': 'sqi',
5191         'sr': 'srp',
5192         'ss': 'ssw',
5193         'st': 'sot',
5194         'su': 'sun',
5195         'sv': 'swe',
5196         'sw': 'swa',
5197         'ta': 'tam',
5198         'te': 'tel',
5199         'tg': 'tgk',
5200         'th': 'tha',
5201         'ti': 'tir',
5202         'tk': 'tuk',
5203         'tl': 'tgl',
5204         'tn': 'tsn',
5205         'to': 'ton',
5206         'tr': 'tur',
5207         'ts': 'tso',
5208         'tt': 'tat',
5209         'tw': 'twi',
5210         'ty': 'tah',
5211         'ug': 'uig',
5212         'uk': 'ukr',
5213         'ur': 'urd',
5214         'uz': 'uzb',
5215         've': 'ven',
5216         'vi': 'vie',
5217         'vo': 'vol',
5218         'wa': 'wln',
5219         'wo': 'wol',
5220         'xh': 'xho',
5221         'yi': 'yid',
5222         'ji': 'yid',  # Replaced by yi in 1989 revision
5223         'yo': 'yor',
5224         'za': 'zha',
5225         'zh': 'zho',
5226         'zu': 'zul',
5227     }
5228
5229     @classmethod
5230     def short2long(cls, code):
5231         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5232         return cls._lang_map.get(code[:2])
5233
5234     @classmethod
5235     def long2short(cls, code):
5236         """Convert language code from ISO 639-2/T to ISO 639-1"""
5237         for short_name, long_name in cls._lang_map.items():
5238             if long_name == code:
5239                 return short_name
5240
5241
5242 class ISO3166Utils(object):
5243     # From http://data.okfn.org/data/core/country-list
5244     _country_map = {
5245         'AF': 'Afghanistan',
5246         'AX': 'Åland Islands',
5247         'AL': 'Albania',
5248         'DZ': 'Algeria',
5249         'AS': 'American Samoa',
5250         'AD': 'Andorra',
5251         'AO': 'Angola',
5252         'AI': 'Anguilla',
5253         'AQ': 'Antarctica',
5254         'AG': 'Antigua and Barbuda',
5255         'AR': 'Argentina',
5256         'AM': 'Armenia',
5257         'AW': 'Aruba',
5258         'AU': 'Australia',
5259         'AT': 'Austria',
5260         'AZ': 'Azerbaijan',
5261         'BS': 'Bahamas',
5262         'BH': 'Bahrain',
5263         'BD': 'Bangladesh',
5264         'BB': 'Barbados',
5265         'BY': 'Belarus',
5266         'BE': 'Belgium',
5267         'BZ': 'Belize',
5268         'BJ': 'Benin',
5269         'BM': 'Bermuda',
5270         'BT': 'Bhutan',
5271         'BO': 'Bolivia, Plurinational State of',
5272         'BQ': 'Bonaire, Sint Eustatius and Saba',
5273         'BA': 'Bosnia and Herzegovina',
5274         'BW': 'Botswana',
5275         'BV': 'Bouvet Island',
5276         'BR': 'Brazil',
5277         'IO': 'British Indian Ocean Territory',
5278         'BN': 'Brunei Darussalam',
5279         'BG': 'Bulgaria',
5280         'BF': 'Burkina Faso',
5281         'BI': 'Burundi',
5282         'KH': 'Cambodia',
5283         'CM': 'Cameroon',
5284         'CA': 'Canada',
5285         'CV': 'Cape Verde',
5286         'KY': 'Cayman Islands',
5287         'CF': 'Central African Republic',
5288         'TD': 'Chad',
5289         'CL': 'Chile',
5290         'CN': 'China',
5291         'CX': 'Christmas Island',
5292         'CC': 'Cocos (Keeling) Islands',
5293         'CO': 'Colombia',
5294         'KM': 'Comoros',
5295         'CG': 'Congo',
5296         'CD': 'Congo, the Democratic Republic of the',
5297         'CK': 'Cook Islands',
5298         'CR': 'Costa Rica',
5299         'CI': 'Côte d\'Ivoire',
5300         'HR': 'Croatia',
5301         'CU': 'Cuba',
5302         'CW': 'Curaçao',
5303         'CY': 'Cyprus',
5304         'CZ': 'Czech Republic',
5305         'DK': 'Denmark',
5306         'DJ': 'Djibouti',
5307         'DM': 'Dominica',
5308         'DO': 'Dominican Republic',
5309         'EC': 'Ecuador',
5310         'EG': 'Egypt',
5311         'SV': 'El Salvador',
5312         'GQ': 'Equatorial Guinea',
5313         'ER': 'Eritrea',
5314         'EE': 'Estonia',
5315         'ET': 'Ethiopia',
5316         'FK': 'Falkland Islands (Malvinas)',
5317         'FO': 'Faroe Islands',
5318         'FJ': 'Fiji',
5319         'FI': 'Finland',
5320         'FR': 'France',
5321         'GF': 'French Guiana',
5322         'PF': 'French Polynesia',
5323         'TF': 'French Southern Territories',
5324         'GA': 'Gabon',
5325         'GM': 'Gambia',
5326         'GE': 'Georgia',
5327         'DE': 'Germany',
5328         'GH': 'Ghana',
5329         'GI': 'Gibraltar',
5330         'GR': 'Greece',
5331         'GL': 'Greenland',
5332         'GD': 'Grenada',
5333         'GP': 'Guadeloupe',
5334         'GU': 'Guam',
5335         'GT': 'Guatemala',
5336         'GG': 'Guernsey',
5337         'GN': 'Guinea',
5338         'GW': 'Guinea-Bissau',
5339         'GY': 'Guyana',
5340         'HT': 'Haiti',
5341         'HM': 'Heard Island and McDonald Islands',
5342         'VA': 'Holy See (Vatican City State)',
5343         'HN': 'Honduras',
5344         'HK': 'Hong Kong',
5345         'HU': 'Hungary',
5346         'IS': 'Iceland',
5347         'IN': 'India',
5348         'ID': 'Indonesia',
5349         'IR': 'Iran, Islamic Republic of',
5350         'IQ': 'Iraq',
5351         'IE': 'Ireland',
5352         'IM': 'Isle of Man',
5353         'IL': 'Israel',
5354         'IT': 'Italy',
5355         'JM': 'Jamaica',
5356         'JP': 'Japan',
5357         'JE': 'Jersey',
5358         'JO': 'Jordan',
5359         'KZ': 'Kazakhstan',
5360         'KE': 'Kenya',
5361         'KI': 'Kiribati',
5362         'KP': 'Korea, Democratic People\'s Republic of',
5363         'KR': 'Korea, Republic of',
5364         'KW': 'Kuwait',
5365         'KG': 'Kyrgyzstan',
5366         'LA': 'Lao People\'s Democratic Republic',
5367         'LV': 'Latvia',
5368         'LB': 'Lebanon',
5369         'LS': 'Lesotho',
5370         'LR': 'Liberia',
5371         'LY': 'Libya',
5372         'LI': 'Liechtenstein',
5373         'LT': 'Lithuania',
5374         'LU': 'Luxembourg',
5375         'MO': 'Macao',
5376         'MK': 'Macedonia, the Former Yugoslav Republic of',
5377         'MG': 'Madagascar',
5378         'MW': 'Malawi',
5379         'MY': 'Malaysia',
5380         'MV': 'Maldives',
5381         'ML': 'Mali',
5382         'MT': 'Malta',
5383         'MH': 'Marshall Islands',
5384         'MQ': 'Martinique',
5385         'MR': 'Mauritania',
5386         'MU': 'Mauritius',
5387         'YT': 'Mayotte',
5388         'MX': 'Mexico',
5389         'FM': 'Micronesia, Federated States of',
5390         'MD': 'Moldova, Republic of',
5391         'MC': 'Monaco',
5392         'MN': 'Mongolia',
5393         'ME': 'Montenegro',
5394         'MS': 'Montserrat',
5395         'MA': 'Morocco',
5396         'MZ': 'Mozambique',
5397         'MM': 'Myanmar',
5398         'NA': 'Namibia',
5399         'NR': 'Nauru',
5400         'NP': 'Nepal',
5401         'NL': 'Netherlands',
5402         'NC': 'New Caledonia',
5403         'NZ': 'New Zealand',
5404         'NI': 'Nicaragua',
5405         'NE': 'Niger',
5406         'NG': 'Nigeria',
5407         'NU': 'Niue',
5408         'NF': 'Norfolk Island',
5409         'MP': 'Northern Mariana Islands',
5410         'NO': 'Norway',
5411         'OM': 'Oman',
5412         'PK': 'Pakistan',
5413         'PW': 'Palau',
5414         'PS': 'Palestine, State of',
5415         'PA': 'Panama',
5416         'PG': 'Papua New Guinea',
5417         'PY': 'Paraguay',
5418         'PE': 'Peru',
5419         'PH': 'Philippines',
5420         'PN': 'Pitcairn',
5421         'PL': 'Poland',
5422         'PT': 'Portugal',
5423         'PR': 'Puerto Rico',
5424         'QA': 'Qatar',
5425         'RE': 'Réunion',
5426         'RO': 'Romania',
5427         'RU': 'Russian Federation',
5428         'RW': 'Rwanda',
5429         'BL': 'Saint Barthélemy',
5430         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5431         'KN': 'Saint Kitts and Nevis',
5432         'LC': 'Saint Lucia',
5433         'MF': 'Saint Martin (French part)',
5434         'PM': 'Saint Pierre and Miquelon',
5435         'VC': 'Saint Vincent and the Grenadines',
5436         'WS': 'Samoa',
5437         'SM': 'San Marino',
5438         'ST': 'Sao Tome and Principe',
5439         'SA': 'Saudi Arabia',
5440         'SN': 'Senegal',
5441         'RS': 'Serbia',
5442         'SC': 'Seychelles',
5443         'SL': 'Sierra Leone',
5444         'SG': 'Singapore',
5445         'SX': 'Sint Maarten (Dutch part)',
5446         'SK': 'Slovakia',
5447         'SI': 'Slovenia',
5448         'SB': 'Solomon Islands',
5449         'SO': 'Somalia',
5450         'ZA': 'South Africa',
5451         'GS': 'South Georgia and the South Sandwich Islands',
5452         'SS': 'South Sudan',
5453         'ES': 'Spain',
5454         'LK': 'Sri Lanka',
5455         'SD': 'Sudan',
5456         'SR': 'Suriname',
5457         'SJ': 'Svalbard and Jan Mayen',
5458         'SZ': 'Swaziland',
5459         'SE': 'Sweden',
5460         'CH': 'Switzerland',
5461         'SY': 'Syrian Arab Republic',
5462         'TW': 'Taiwan, Province of China',
5463         'TJ': 'Tajikistan',
5464         'TZ': 'Tanzania, United Republic of',
5465         'TH': 'Thailand',
5466         'TL': 'Timor-Leste',
5467         'TG': 'Togo',
5468         'TK': 'Tokelau',
5469         'TO': 'Tonga',
5470         'TT': 'Trinidad and Tobago',
5471         'TN': 'Tunisia',
5472         'TR': 'Turkey',
5473         'TM': 'Turkmenistan',
5474         'TC': 'Turks and Caicos Islands',
5475         'TV': 'Tuvalu',
5476         'UG': 'Uganda',
5477         'UA': 'Ukraine',
5478         'AE': 'United Arab Emirates',
5479         'GB': 'United Kingdom',
5480         'US': 'United States',
5481         'UM': 'United States Minor Outlying Islands',
5482         'UY': 'Uruguay',
5483         'UZ': 'Uzbekistan',
5484         'VU': 'Vanuatu',
5485         'VE': 'Venezuela, Bolivarian Republic of',
5486         'VN': 'Viet Nam',
5487         'VG': 'Virgin Islands, British',
5488         'VI': 'Virgin Islands, U.S.',
5489         'WF': 'Wallis and Futuna',
5490         'EH': 'Western Sahara',
5491         'YE': 'Yemen',
5492         'ZM': 'Zambia',
5493         'ZW': 'Zimbabwe',
5494     }
5495
5496     @classmethod
5497     def short2full(cls, code):
5498         """Convert an ISO 3166-2 country code to the corresponding full name"""
5499         return cls._country_map.get(code.upper())
5500
5501
5502 class GeoUtils(object):
5503     # Major IPv4 address blocks per country
5504     _country_ip_map = {
5505         'AD': '46.172.224.0/19',
5506         'AE': '94.200.0.0/13',
5507         'AF': '149.54.0.0/17',
5508         'AG': '209.59.64.0/18',
5509         'AI': '204.14.248.0/21',
5510         'AL': '46.99.0.0/16',
5511         'AM': '46.70.0.0/15',
5512         'AO': '105.168.0.0/13',
5513         'AP': '182.50.184.0/21',
5514         'AQ': '23.154.160.0/24',
5515         'AR': '181.0.0.0/12',
5516         'AS': '202.70.112.0/20',
5517         'AT': '77.116.0.0/14',
5518         'AU': '1.128.0.0/11',
5519         'AW': '181.41.0.0/18',
5520         'AX': '185.217.4.0/22',
5521         'AZ': '5.197.0.0/16',
5522         'BA': '31.176.128.0/17',
5523         'BB': '65.48.128.0/17',
5524         'BD': '114.130.0.0/16',
5525         'BE': '57.0.0.0/8',
5526         'BF': '102.178.0.0/15',
5527         'BG': '95.42.0.0/15',
5528         'BH': '37.131.0.0/17',
5529         'BI': '154.117.192.0/18',
5530         'BJ': '137.255.0.0/16',
5531         'BL': '185.212.72.0/23',
5532         'BM': '196.12.64.0/18',
5533         'BN': '156.31.0.0/16',
5534         'BO': '161.56.0.0/16',
5535         'BQ': '161.0.80.0/20',
5536         'BR': '191.128.0.0/12',
5537         'BS': '24.51.64.0/18',
5538         'BT': '119.2.96.0/19',
5539         'BW': '168.167.0.0/16',
5540         'BY': '178.120.0.0/13',
5541         'BZ': '179.42.192.0/18',
5542         'CA': '99.224.0.0/11',
5543         'CD': '41.243.0.0/16',
5544         'CF': '197.242.176.0/21',
5545         'CG': '160.113.0.0/16',
5546         'CH': '85.0.0.0/13',
5547         'CI': '102.136.0.0/14',
5548         'CK': '202.65.32.0/19',
5549         'CL': '152.172.0.0/14',
5550         'CM': '102.244.0.0/14',
5551         'CN': '36.128.0.0/10',
5552         'CO': '181.240.0.0/12',
5553         'CR': '201.192.0.0/12',
5554         'CU': '152.206.0.0/15',
5555         'CV': '165.90.96.0/19',
5556         'CW': '190.88.128.0/17',
5557         'CY': '31.153.0.0/16',
5558         'CZ': '88.100.0.0/14',
5559         'DE': '53.0.0.0/8',
5560         'DJ': '197.241.0.0/17',
5561         'DK': '87.48.0.0/12',
5562         'DM': '192.243.48.0/20',
5563         'DO': '152.166.0.0/15',
5564         'DZ': '41.96.0.0/12',
5565         'EC': '186.68.0.0/15',
5566         'EE': '90.190.0.0/15',
5567         'EG': '156.160.0.0/11',
5568         'ER': '196.200.96.0/20',
5569         'ES': '88.0.0.0/11',
5570         'ET': '196.188.0.0/14',
5571         'EU': '2.16.0.0/13',
5572         'FI': '91.152.0.0/13',
5573         'FJ': '144.120.0.0/16',
5574         'FK': '80.73.208.0/21',
5575         'FM': '119.252.112.0/20',
5576         'FO': '88.85.32.0/19',
5577         'FR': '90.0.0.0/9',
5578         'GA': '41.158.0.0/15',
5579         'GB': '25.0.0.0/8',
5580         'GD': '74.122.88.0/21',
5581         'GE': '31.146.0.0/16',
5582         'GF': '161.22.64.0/18',
5583         'GG': '62.68.160.0/19',
5584         'GH': '154.160.0.0/12',
5585         'GI': '95.164.0.0/16',
5586         'GL': '88.83.0.0/19',
5587         'GM': '160.182.0.0/15',
5588         'GN': '197.149.192.0/18',
5589         'GP': '104.250.0.0/19',
5590         'GQ': '105.235.224.0/20',
5591         'GR': '94.64.0.0/13',
5592         'GT': '168.234.0.0/16',
5593         'GU': '168.123.0.0/16',
5594         'GW': '197.214.80.0/20',
5595         'GY': '181.41.64.0/18',
5596         'HK': '113.252.0.0/14',
5597         'HN': '181.210.0.0/16',
5598         'HR': '93.136.0.0/13',
5599         'HT': '148.102.128.0/17',
5600         'HU': '84.0.0.0/14',
5601         'ID': '39.192.0.0/10',
5602         'IE': '87.32.0.0/12',
5603         'IL': '79.176.0.0/13',
5604         'IM': '5.62.80.0/20',
5605         'IN': '117.192.0.0/10',
5606         'IO': '203.83.48.0/21',
5607         'IQ': '37.236.0.0/14',
5608         'IR': '2.176.0.0/12',
5609         'IS': '82.221.0.0/16',
5610         'IT': '79.0.0.0/10',
5611         'JE': '87.244.64.0/18',
5612         'JM': '72.27.0.0/17',
5613         'JO': '176.29.0.0/16',
5614         'JP': '133.0.0.0/8',
5615         'KE': '105.48.0.0/12',
5616         'KG': '158.181.128.0/17',
5617         'KH': '36.37.128.0/17',
5618         'KI': '103.25.140.0/22',
5619         'KM': '197.255.224.0/20',
5620         'KN': '198.167.192.0/19',
5621         'KP': '175.45.176.0/22',
5622         'KR': '175.192.0.0/10',
5623         'KW': '37.36.0.0/14',
5624         'KY': '64.96.0.0/15',
5625         'KZ': '2.72.0.0/13',
5626         'LA': '115.84.64.0/18',
5627         'LB': '178.135.0.0/16',
5628         'LC': '24.92.144.0/20',
5629         'LI': '82.117.0.0/19',
5630         'LK': '112.134.0.0/15',
5631         'LR': '102.183.0.0/16',
5632         'LS': '129.232.0.0/17',
5633         'LT': '78.56.0.0/13',
5634         'LU': '188.42.0.0/16',
5635         'LV': '46.109.0.0/16',
5636         'LY': '41.252.0.0/14',
5637         'MA': '105.128.0.0/11',
5638         'MC': '88.209.64.0/18',
5639         'MD': '37.246.0.0/16',
5640         'ME': '178.175.0.0/17',
5641         'MF': '74.112.232.0/21',
5642         'MG': '154.126.0.0/17',
5643         'MH': '117.103.88.0/21',
5644         'MK': '77.28.0.0/15',
5645         'ML': '154.118.128.0/18',
5646         'MM': '37.111.0.0/17',
5647         'MN': '49.0.128.0/17',
5648         'MO': '60.246.0.0/16',
5649         'MP': '202.88.64.0/20',
5650         'MQ': '109.203.224.0/19',
5651         'MR': '41.188.64.0/18',
5652         'MS': '208.90.112.0/22',
5653         'MT': '46.11.0.0/16',
5654         'MU': '105.16.0.0/12',
5655         'MV': '27.114.128.0/18',
5656         'MW': '102.70.0.0/15',
5657         'MX': '187.192.0.0/11',
5658         'MY': '175.136.0.0/13',
5659         'MZ': '197.218.0.0/15',
5660         'NA': '41.182.0.0/16',
5661         'NC': '101.101.0.0/18',
5662         'NE': '197.214.0.0/18',
5663         'NF': '203.17.240.0/22',
5664         'NG': '105.112.0.0/12',
5665         'NI': '186.76.0.0/15',
5666         'NL': '145.96.0.0/11',
5667         'NO': '84.208.0.0/13',
5668         'NP': '36.252.0.0/15',
5669         'NR': '203.98.224.0/19',
5670         'NU': '49.156.48.0/22',
5671         'NZ': '49.224.0.0/14',
5672         'OM': '5.36.0.0/15',
5673         'PA': '186.72.0.0/15',
5674         'PE': '186.160.0.0/14',
5675         'PF': '123.50.64.0/18',
5676         'PG': '124.240.192.0/19',
5677         'PH': '49.144.0.0/13',
5678         'PK': '39.32.0.0/11',
5679         'PL': '83.0.0.0/11',
5680         'PM': '70.36.0.0/20',
5681         'PR': '66.50.0.0/16',
5682         'PS': '188.161.0.0/16',
5683         'PT': '85.240.0.0/13',
5684         'PW': '202.124.224.0/20',
5685         'PY': '181.120.0.0/14',
5686         'QA': '37.210.0.0/15',
5687         'RE': '102.35.0.0/16',
5688         'RO': '79.112.0.0/13',
5689         'RS': '93.86.0.0/15',
5690         'RU': '5.136.0.0/13',
5691         'RW': '41.186.0.0/16',
5692         'SA': '188.48.0.0/13',
5693         'SB': '202.1.160.0/19',
5694         'SC': '154.192.0.0/11',
5695         'SD': '102.120.0.0/13',
5696         'SE': '78.64.0.0/12',
5697         'SG': '8.128.0.0/10',
5698         'SI': '188.196.0.0/14',
5699         'SK': '78.98.0.0/15',
5700         'SL': '102.143.0.0/17',
5701         'SM': '89.186.32.0/19',
5702         'SN': '41.82.0.0/15',
5703         'SO': '154.115.192.0/18',
5704         'SR': '186.179.128.0/17',
5705         'SS': '105.235.208.0/21',
5706         'ST': '197.159.160.0/19',
5707         'SV': '168.243.0.0/16',
5708         'SX': '190.102.0.0/20',
5709         'SY': '5.0.0.0/16',
5710         'SZ': '41.84.224.0/19',
5711         'TC': '65.255.48.0/20',
5712         'TD': '154.68.128.0/19',
5713         'TG': '196.168.0.0/14',
5714         'TH': '171.96.0.0/13',
5715         'TJ': '85.9.128.0/18',
5716         'TK': '27.96.24.0/21',
5717         'TL': '180.189.160.0/20',
5718         'TM': '95.85.96.0/19',
5719         'TN': '197.0.0.0/11',
5720         'TO': '175.176.144.0/21',
5721         'TR': '78.160.0.0/11',
5722         'TT': '186.44.0.0/15',
5723         'TV': '202.2.96.0/19',
5724         'TW': '120.96.0.0/11',
5725         'TZ': '156.156.0.0/14',
5726         'UA': '37.52.0.0/14',
5727         'UG': '102.80.0.0/13',
5728         'US': '6.0.0.0/8',
5729         'UY': '167.56.0.0/13',
5730         'UZ': '84.54.64.0/18',
5731         'VA': '212.77.0.0/19',
5732         'VC': '207.191.240.0/21',
5733         'VE': '186.88.0.0/13',
5734         'VG': '66.81.192.0/20',
5735         'VI': '146.226.0.0/16',
5736         'VN': '14.160.0.0/11',
5737         'VU': '202.80.32.0/20',
5738         'WF': '117.20.32.0/21',
5739         'WS': '202.4.32.0/19',
5740         'YE': '134.35.0.0/16',
5741         'YT': '41.242.116.0/22',
5742         'ZA': '41.0.0.0/11',
5743         'ZM': '102.144.0.0/13',
5744         'ZW': '102.177.192.0/18',
5745     }
5746
5747     @classmethod
5748     def random_ipv4(cls, code_or_block):
5749         if len(code_or_block) == 2:
5750             block = cls._country_ip_map.get(code_or_block.upper())
5751             if not block:
5752                 return None
5753         else:
5754             block = code_or_block
5755         addr, preflen = block.split('/')
5756         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5757         addr_max = addr_min | (0xffffffff >> int(preflen))
5758         return compat_str(socket.inet_ntoa(
5759             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5760
5761
5762 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5763     def __init__(self, proxies=None):
5764         # Set default handlers
5765         for type in ('http', 'https'):
5766             setattr(self, '%s_open' % type,
5767                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5768                         meth(r, proxy, type))
5769         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5770
5771     def proxy_open(self, req, proxy, type):
5772         req_proxy = req.headers.get('Ytdl-request-proxy')
5773         if req_proxy is not None:
5774             proxy = req_proxy
5775             del req.headers['Ytdl-request-proxy']
5776
5777         if proxy == '__noproxy__':
5778             return None  # No Proxy
5779         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5780             req.add_header('Ytdl-socks-proxy', proxy)
5781             # yt-dlp's http/https handlers do wrapping the socket with socks
5782             return None
5783         return compat_urllib_request.ProxyHandler.proxy_open(
5784             self, req, proxy, type)
5785
5786
5787 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5788 # released into Public Domain
5789 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5790
5791 def long_to_bytes(n, blocksize=0):
5792     """long_to_bytes(n:long, blocksize:int) : string
5793     Convert a long integer to a byte string.
5794
5795     If optional blocksize is given and greater than zero, pad the front of the
5796     byte string with binary zeros so that the length is a multiple of
5797     blocksize.
5798     """
5799     # after much testing, this algorithm was deemed to be the fastest
5800     s = b''
5801     n = int(n)
5802     while n > 0:
5803         s = compat_struct_pack('>I', n & 0xffffffff) + s
5804         n = n >> 32
5805     # strip off leading zeros
5806     for i in range(len(s)):
5807         if s[i] != b'\000'[0]:
5808             break
5809     else:
5810         # only happens when n == 0
5811         s = b'\000'
5812         i = 0
5813     s = s[i:]
5814     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5815     # de-padding being done above, but sigh...
5816     if blocksize > 0 and len(s) % blocksize:
5817         s = (blocksize - len(s) % blocksize) * b'\000' + s
5818     return s
5819
5820
5821 def bytes_to_long(s):
5822     """bytes_to_long(string) : long
5823     Convert a byte string to a long integer.
5824
5825     This is (essentially) the inverse of long_to_bytes().
5826     """
5827     acc = 0
5828     length = len(s)
5829     if length % 4:
5830         extra = (4 - length % 4)
5831         s = b'\000' * extra + s
5832         length = length + extra
5833     for i in range(0, length, 4):
5834         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5835     return acc
5836
5837
5838 def ohdave_rsa_encrypt(data, exponent, modulus):
5839     '''
5840     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5841
5842     Input:
5843         data: data to encrypt, bytes-like object
5844         exponent, modulus: parameter e and N of RSA algorithm, both integer
5845     Output: hex string of encrypted data
5846
5847     Limitation: supports one block encryption only
5848     '''
5849
5850     payload = int(binascii.hexlify(data[::-1]), 16)
5851     encrypted = pow(payload, exponent, modulus)
5852     return '%x' % encrypted
5853
5854
5855 def pkcs1pad(data, length):
5856     """
5857     Padding input data with PKCS#1 scheme
5858
5859     @param {int[]} data        input data
5860     @param {int}   length      target length
5861     @returns {int[]}           padded data
5862     """
5863     if len(data) > length - 11:
5864         raise ValueError('Input data too long for PKCS#1 padding')
5865
5866     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5867     return [0, 2] + pseudo_random + [0] + data
5868
5869
5870 def encode_base_n(num, n, table=None):
5871     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5872     if not table:
5873         table = FULL_TABLE[:n]
5874
5875     if n > len(table):
5876         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5877
5878     if num == 0:
5879         return table[0]
5880
5881     ret = ''
5882     while num:
5883         ret = table[num % n] + ret
5884         num = num // n
5885     return ret
5886
5887
5888 def decode_packed_codes(code):
5889     mobj = re.search(PACKED_CODES_RE, code)
5890     obfuscated_code, base, count, symbols = mobj.groups()
5891     base = int(base)
5892     count = int(count)
5893     symbols = symbols.split('|')
5894     symbol_table = {}
5895
5896     while count:
5897         count -= 1
5898         base_n_count = encode_base_n(count, base)
5899         symbol_table[base_n_count] = symbols[count] or base_n_count
5900
5901     return re.sub(
5902         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5903         obfuscated_code)
5904
5905
5906 def caesar(s, alphabet, shift):
5907     if shift == 0:
5908         return s
5909     l = len(alphabet)
5910     return ''.join(
5911         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5912         for c in s)
5913
5914
5915 def rot47(s):
5916     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5917
5918
5919 def parse_m3u8_attributes(attrib):
5920     info = {}
5921     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5922         if val.startswith('"'):
5923             val = val[1:-1]
5924         info[key] = val
5925     return info
5926
5927
5928 def urshift(val, n):
5929     return val >> n if val >= 0 else (val + 0x100000000) >> n
5930
5931
5932 # Based on png2str() written by @gdkchan and improved by @yokrysty
5933 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5934 def decode_png(png_data):
5935     # Reference: https://www.w3.org/TR/PNG/
5936     header = png_data[8:]
5937
5938     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5939         raise IOError('Not a valid PNG file.')
5940
5941     int_map = {1: '>B', 2: '>H', 4: '>I'}
5942     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5943
5944     chunks = []
5945
5946     while header:
5947         length = unpack_integer(header[:4])
5948         header = header[4:]
5949
5950         chunk_type = header[:4]
5951         header = header[4:]
5952
5953         chunk_data = header[:length]
5954         header = header[length:]
5955
5956         header = header[4:]  # Skip CRC
5957
5958         chunks.append({
5959             'type': chunk_type,
5960             'length': length,
5961             'data': chunk_data
5962         })
5963
5964     ihdr = chunks[0]['data']
5965
5966     width = unpack_integer(ihdr[:4])
5967     height = unpack_integer(ihdr[4:8])
5968
5969     idat = b''
5970
5971     for chunk in chunks:
5972         if chunk['type'] == b'IDAT':
5973             idat += chunk['data']
5974
5975     if not idat:
5976         raise IOError('Unable to read PNG data.')
5977
5978     decompressed_data = bytearray(zlib.decompress(idat))
5979
5980     stride = width * 3
5981     pixels = []
5982
5983     def _get_pixel(idx):
5984         x = idx % stride
5985         y = idx // stride
5986         return pixels[y][x]
5987
5988     for y in range(height):
5989         basePos = y * (1 + stride)
5990         filter_type = decompressed_data[basePos]
5991
5992         current_row = []
5993
5994         pixels.append(current_row)
5995
5996         for x in range(stride):
5997             color = decompressed_data[1 + basePos + x]
5998             basex = y * stride + x
5999             left = 0
6000             up = 0
6001
6002             if x > 2:
6003                 left = _get_pixel(basex - 3)
6004             if y > 0:
6005                 up = _get_pixel(basex - stride)
6006
6007             if filter_type == 1:  # Sub
6008                 color = (color + left) & 0xff
6009             elif filter_type == 2:  # Up
6010                 color = (color + up) & 0xff
6011             elif filter_type == 3:  # Average
6012                 color = (color + ((left + up) >> 1)) & 0xff
6013             elif filter_type == 4:  # Paeth
6014                 a = left
6015                 b = up
6016                 c = 0
6017
6018                 if x > 2 and y > 0:
6019                     c = _get_pixel(basex - stride - 3)
6020
6021                 p = a + b - c
6022
6023                 pa = abs(p - a)
6024                 pb = abs(p - b)
6025                 pc = abs(p - c)
6026
6027                 if pa <= pb and pa <= pc:
6028                     color = (color + a) & 0xff
6029                 elif pb <= pc:
6030                     color = (color + b) & 0xff
6031                 else:
6032                     color = (color + c) & 0xff
6033
6034             current_row.append(color)
6035
6036     return width, height, pixels
6037
6038
6039 def write_xattr(path, key, value):
6040     # This mess below finds the best xattr tool for the job
6041     try:
6042         # try the pyxattr module...
6043         import xattr
6044
6045         if hasattr(xattr, 'set'):  # pyxattr
6046             # Unicode arguments are not supported in python-pyxattr until
6047             # version 0.5.0
6048             # See https://github.com/ytdl-org/youtube-dl/issues/5498
6049             pyxattr_required_version = '0.5.0'
6050             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6051                 # TODO: fallback to CLI tools
6052                 raise XAttrUnavailableError(
6053                     'python-pyxattr is detected but is too old. '
6054                     'yt-dlp requires %s or above while your version is %s. '
6055                     'Falling back to other xattr implementations' % (
6056                         pyxattr_required_version, xattr.__version__))
6057
6058             setxattr = xattr.set
6059         else:  # xattr
6060             setxattr = xattr.setxattr
6061
6062         try:
6063             setxattr(path, key, value)
6064         except EnvironmentError as e:
6065             raise XAttrMetadataError(e.errno, e.strerror)
6066
6067     except ImportError:
6068         if compat_os_name == 'nt':
6069             # Write xattrs to NTFS Alternate Data Streams:
6070             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6071             assert ':' not in key
6072             assert os.path.exists(path)
6073
6074             ads_fn = path + ':' + key
6075             try:
6076                 with open(ads_fn, 'wb') as f:
6077                     f.write(value)
6078             except EnvironmentError as e:
6079                 raise XAttrMetadataError(e.errno, e.strerror)
6080         else:
6081             user_has_setfattr = check_executable('setfattr', ['--version'])
6082             user_has_xattr = check_executable('xattr', ['-h'])
6083
6084             if user_has_setfattr or user_has_xattr:
6085
6086                 value = value.decode('utf-8')
6087                 if user_has_setfattr:
6088                     executable = 'setfattr'
6089                     opts = ['-n', key, '-v', value]
6090                 elif user_has_xattr:
6091                     executable = 'xattr'
6092                     opts = ['-w', key, value]
6093
6094                 cmd = ([encodeFilename(executable, True)]
6095                        + [encodeArgument(o) for o in opts]
6096                        + [encodeFilename(path, True)])
6097
6098                 try:
6099                     p = subprocess.Popen(
6100                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6101                 except EnvironmentError as e:
6102                     raise XAttrMetadataError(e.errno, e.strerror)
6103                 stdout, stderr = process_communicate_or_kill(p)
6104                 stderr = stderr.decode('utf-8', 'replace')
6105                 if p.returncode != 0:
6106                     raise XAttrMetadataError(p.returncode, stderr)
6107
6108             else:
6109                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6110                 if sys.platform.startswith('linux'):
6111                     raise XAttrUnavailableError(
6112                         "Couldn't find a tool to set the xattrs. "
6113                         "Install either the python 'pyxattr' or 'xattr' "
6114                         "modules, or the GNU 'attr' package "
6115                         "(which contains the 'setfattr' tool).")
6116                 else:
6117                     raise XAttrUnavailableError(
6118                         "Couldn't find a tool to set the xattrs. "
6119                         "Install either the python 'xattr' module, "
6120                         "or the 'xattr' binary.")
6121
6122
6123 def random_birthday(year_field, month_field, day_field):
6124     start_date = datetime.date(1950, 1, 1)
6125     end_date = datetime.date(1995, 12, 31)
6126     offset = random.randint(0, (end_date - start_date).days)
6127     random_date = start_date + datetime.timedelta(offset)
6128     return {
6129         year_field: str(random_date.year),
6130         month_field: str(random_date.month),
6131         day_field: str(random_date.day),
6132     }
6133
6134
6135 # Templates for internet shortcut files, which are plain text files.
6136 DOT_URL_LINK_TEMPLATE = '''
6137 [InternetShortcut]
6138 URL=%(url)s
6139 '''.lstrip()
6140
6141 DOT_WEBLOC_LINK_TEMPLATE = '''
6142 <?xml version="1.0" encoding="UTF-8"?>
6143 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6144 <plist version="1.0">
6145 <dict>
6146 \t<key>URL</key>
6147 \t<string>%(url)s</string>
6148 </dict>
6149 </plist>
6150 '''.lstrip()
6151
6152 DOT_DESKTOP_LINK_TEMPLATE = '''
6153 [Desktop Entry]
6154 Encoding=UTF-8
6155 Name=%(filename)s
6156 Type=Link
6157 URL=%(url)s
6158 Icon=text-html
6159 '''.lstrip()
6160
6161
6162 def iri_to_uri(iri):
6163     """
6164     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6165
6166     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6167     """
6168
6169     iri_parts = compat_urllib_parse_urlparse(iri)
6170
6171     if '[' in iri_parts.netloc:
6172         raise ValueError('IPv6 URIs are not, yet, supported.')
6173         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6174
6175     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6176
6177     net_location = ''
6178     if iri_parts.username:
6179         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6180         if iri_parts.password is not None:
6181             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6182         net_location += '@'
6183
6184     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6185     # The 'idna' encoding produces ASCII text.
6186     if iri_parts.port is not None and iri_parts.port != 80:
6187         net_location += ':' + str(iri_parts.port)
6188
6189     return compat_urllib_parse_urlunparse(
6190         (iri_parts.scheme,
6191             net_location,
6192
6193             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6194
6195             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6196             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6197
6198             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6199             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6200
6201             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6202
6203     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6204
6205
6206 def to_high_limit_path(path):
6207     if sys.platform in ['win32', 'cygwin']:
6208         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6209         return r'\\?\ '.rstrip() + os.path.abspath(path)
6210
6211     return path
6212
6213
6214 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6215     if field is None:
6216         val = obj if obj is not None else default
6217     else:
6218         val = obj.get(field, default)
6219     if func and val not in ignore:
6220         val = func(val)
6221     return template % val if val not in ignore else default
6222
6223
6224 def clean_podcast_url(url):
6225     return re.sub(r'''(?x)
6226         (?:
6227             (?:
6228                 chtbl\.com/track|
6229                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6230                 play\.podtrac\.com
6231             )/[^/]+|
6232             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6233             flex\.acast\.com|
6234             pd(?:
6235                 cn\.co| # https://podcorn.com/analytics-prefix/
6236                 st\.fm # https://podsights.com/docs/
6237             )/e
6238         )/''', '', url)
6239
6240
6241 _HEX_TABLE = '0123456789abcdef'
6242
6243
6244 def random_uuidv4():
6245     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6246
6247
6248 def make_dir(path, to_screen=None):
6249     try:
6250         dn = os.path.dirname(path)
6251         if dn and not os.path.exists(dn):
6252             os.makedirs(dn)
6253         return True
6254     except (OSError, IOError) as err:
6255         if callable(to_screen) is not None:
6256             to_screen('unable to create directory ' + error_to_compat_str(err))
6257         return False
6258
6259
6260 def get_executable_path():
6261     from zipimport import zipimporter
6262     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6263         path = os.path.dirname(sys.executable)
6264     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6265         path = os.path.join(os.path.dirname(__file__), '../..')
6266     else:
6267         path = os.path.join(os.path.dirname(__file__), '..')
6268     return os.path.abspath(path)
6269
6270
6271 def load_plugins(name, suffix, namespace):
6272     plugin_info = [None]
6273     classes = []
6274     try:
6275         plugin_info = imp.find_module(
6276             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6277         plugins = imp.load_module(name, *plugin_info)
6278         for name in dir(plugins):
6279             if name in namespace:
6280                 continue
6281             if not name.endswith(suffix):
6282                 continue
6283             klass = getattr(plugins, name)
6284             classes.append(klass)
6285             namespace[name] = klass
6286     except ImportError:
6287         pass
6288     finally:
6289         if plugin_info[0] is not None:
6290             plugin_info[0].close()
6291     return classes
6292
6293
6294 def traverse_obj(
6295         obj, *path_list, default=None, expected_type=None, get_all=True,
6296         casesense=True, is_user_input=False, traverse_string=False):
6297     ''' Traverse nested list/dict/tuple
6298     @param path_list        A list of paths which are checked one by one.
6299                             Each path is a list of keys where each key is a string,
6300                             a tuple of strings or "...". When a tuple is given,
6301                             all the keys given in the tuple are traversed, and
6302                             "..." traverses all the keys in the object
6303     @param default          Default value to return
6304     @param expected_type    Only accept final value of this type (Can also be any callable)
6305     @param get_all          Return all the values obtained from a path or only the first one
6306     @param casesense        Whether to consider dictionary keys as case sensitive
6307     @param is_user_input    Whether the keys are generated from user input. If True,
6308                             strings are converted to int/slice if necessary
6309     @param traverse_string  Whether to traverse inside strings. If True, any
6310                             non-compatible object will also be converted into a string
6311     # TODO: Write tests
6312     '''
6313     if not casesense:
6314         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6315         path_list = (map(_lower, variadic(path)) for path in path_list)
6316
6317     def _traverse_obj(obj, path, _current_depth=0):
6318         nonlocal depth
6319         if obj is None:
6320             return None
6321         path = tuple(variadic(path))
6322         for i, key in enumerate(path):
6323             if isinstance(key, (list, tuple)):
6324                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6325                 key = ...
6326             if key is ...:
6327                 obj = (obj.values() if isinstance(obj, dict)
6328                        else obj if isinstance(obj, (list, tuple, LazyList))
6329                        else str(obj) if traverse_string else [])
6330                 _current_depth += 1
6331                 depth = max(depth, _current_depth)
6332                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6333             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6334                 obj = (obj.get(key) if casesense or (key in obj)
6335                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6336             else:
6337                 if is_user_input:
6338                     key = (int_or_none(key) if ':' not in key
6339                            else slice(*map(int_or_none, key.split(':'))))
6340                     if key == slice(None):
6341                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6342                 if not isinstance(key, (int, slice)):
6343                     return None
6344                 if not isinstance(obj, (list, tuple, LazyList)):
6345                     if not traverse_string:
6346                         return None
6347                     obj = str(obj)
6348                 try:
6349                     obj = obj[key]
6350                 except IndexError:
6351                     return None
6352         return obj
6353
6354     if isinstance(expected_type, type):
6355         type_test = lambda val: val if isinstance(val, expected_type) else None
6356     elif expected_type is not None:
6357         type_test = expected_type
6358     else:
6359         type_test = lambda val: val
6360
6361     for path in path_list:
6362         depth = 0
6363         val = _traverse_obj(obj, path)
6364         if val is not None:
6365             if depth:
6366                 for _ in range(depth - 1):
6367                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6368                 val = [v for v in map(type_test, val) if v is not None]
6369                 if val:
6370                     return val if get_all else val[0]
6371             else:
6372                 val = type_test(val)
6373                 if val is not None:
6374                     return val
6375     return default
6376
6377
6378 def traverse_dict(dictn, keys, casesense=True):
6379     ''' For backward compatibility. Do not use '''
6380     return traverse_obj(dictn, keys, casesense=casesense,
6381                         is_user_input=True, traverse_string=True)
6382
6383
6384 def variadic(x, allowed_types=(str, bytes)):
6385     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6386
6387
6388 # create a JSON Web Signature (jws) with HS256 algorithm
6389 # the resulting format is in JWS Compact Serialization
6390 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6391 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6392 def jwt_encode_hs256(payload_data, key, headers={}):
6393     header_data = {
6394         'alg': 'HS256',
6395         'typ': 'JWT',
6396     }
6397     if headers:
6398         header_data.update(headers)
6399     header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6400     payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6401     h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6402     signature_b64 = base64.b64encode(h.digest())
6403     token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6404     return token