yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y/%m/%d',
1744     '%Y/%m/%d %H:%M',
1745     '%Y/%m/%d %H:%M:%S',
1746     '%Y-%m-%d %H:%M',
1747     '%Y-%m-%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M:%S.%f',
1749     '%Y-%m-%d %H:%M:%S:%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf, default=repr)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     if s == '':
2111         return ''
2112     # Handle timestamps
2113     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114     result = ''.join(map(replace_insane, s))
2115     if not is_id:
2116         while '__' in result:
2117             result = result.replace('__', '_')
2118         result = result.strip('_')
2119         # Common case of "Foreign band name - English song title"
2120         if restricted and result.startswith('-_'):
2121             result = result[2:]
2122         if result.startswith('-'):
2123             result = '_' + result[len('-'):]
2124         result = result.lstrip('.')
2125         if not result:
2126             result = '_'
2127     return result
2128
2129
2130 def sanitize_path(s, force=False):
2131     """Sanitizes and normalizes path on Windows"""
2132     if sys.platform == 'win32':
2133         force = False
2134         drive_or_unc, _ = os.path.splitdrive(s)
2135         if sys.version_info < (2, 7) and not drive_or_unc:
2136             drive_or_unc, _ = os.path.splitunc(s)
2137     elif force:
2138         drive_or_unc = ''
2139     else:
2140         return s
2141
2142     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143     if drive_or_unc:
2144         norm_path.pop(0)
2145     sanitized_path = [
2146         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147         for path_part in norm_path]
2148     if drive_or_unc:
2149         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150     elif force and s[0] == os.path.sep:
2151         sanitized_path.insert(0, os.path.sep)
2152     return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157     # the number of unwanted failures due to missing protocol
2158     if url.startswith('//'):
2159         return 'http:%s' % url
2160     # Fix some common typos seen so far
2161     COMMON_TYPOS = (
2162         # https://github.com/ytdl-org/youtube-dl/issues/15649
2163         (r'^httpss://', r'https://'),
2164         # https://bx1.be/lives/direct-tv/
2165         (r'^rmtp([es]?)://', r'rtmp\1://'),
2166     )
2167     for mistake, fixup in COMMON_TYPOS:
2168         if re.match(mistake, url):
2169             return re.sub(mistake, fixup, url)
2170     return url
2171
2172
2173 def extract_basic_auth(url):
2174     parts = compat_urlparse.urlsplit(url)
2175     if parts.username is None:
2176         return url, None
2177     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178         parts.hostname if parts.port is None
2179         else '%s:%d' % (parts.hostname, parts.port))))
2180     auth_payload = base64.b64encode(
2181         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182     return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187     if auth_header is not None:
2188         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189         headers['Authorization'] = auth_header
2190     return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194     """Expand shell variables and ~"""
2195     return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199     """ Remove all duplicates from the input iterable """
2200     res = []
2201     for el in iterable:
2202         if el not in res:
2203             res.append(el)
2204     return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208     """Transforms an HTML entity to a character."""
2209     entity = entity_with_semicolon[:-1]
2210
2211     # Known non-numeric HTML entity
2212     if entity in compat_html_entities.name2codepoint:
2213         return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215     # TODO: HTML5 allows entities without a semicolon. For example,
2216     # '&Eacuteric' should be decoded as 'Éric'.
2217     if entity_with_semicolon in compat_html_entities_html5:
2218         return compat_html_entities_html5[entity_with_semicolon]
2219
2220     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221     if mobj is not None:
2222         numstr = mobj.group(1)
2223         if numstr.startswith('x'):
2224             base = 16
2225             numstr = '0%s' % numstr
2226         else:
2227             base = 10
2228         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229         try:
2230             return compat_chr(int(numstr, base))
2231         except ValueError:
2232             pass
2233
2234     # Unknown entity in name, return its literal representation
2235     return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239     if s is None:
2240         return None
2241     assert type(s) == compat_str
2242
2243     return re.sub(
2244         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def escapeHTML(text):
2248     return (
2249         text
2250         .replace('&', '&amp;')
2251         .replace('<', '&lt;')
2252         .replace('>', '&gt;')
2253         .replace('"', '&quot;')
2254         .replace("'", '&#39;')
2255     )
2256
2257
2258 def process_communicate_or_kill(p, *args, **kwargs):
2259     try:
2260         return p.communicate(*args, **kwargs)
2261     except BaseException:  # Including KeyboardInterrupt
2262         p.kill()
2263         p.wait()
2264         raise
2265
2266
2267 def get_subprocess_encoding():
2268     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269         # For subprocess calls, encode with locale encoding
2270         # Refer to http://stackoverflow.com/a/9951851/35070
2271         encoding = preferredencoding()
2272     else:
2273         encoding = sys.getfilesystemencoding()
2274     if encoding is None:
2275         encoding = 'utf-8'
2276     return encoding
2277
2278
2279 def encodeFilename(s, for_subprocess=False):
2280     """
2281     @param s The name of the file
2282     """
2283
2284     assert type(s) == compat_str
2285
2286     # Python 3 has a Unicode API
2287     if sys.version_info >= (3, 0):
2288         return s
2289
2290     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294         return s
2295
2296     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297     if sys.platform.startswith('java'):
2298         return s
2299
2300     return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def decodeFilename(b, for_subprocess=False):
2304
2305     if sys.version_info >= (3, 0):
2306         return b
2307
2308     if not isinstance(b, bytes):
2309         return b
2310
2311     return b.decode(get_subprocess_encoding(), 'ignore')
2312
2313
2314 def encodeArgument(s):
2315     if not isinstance(s, compat_str):
2316         # Legacy code that uses byte strings
2317         # Uncomment the following line after fixing all post processors
2318         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2319         s = s.decode('ascii')
2320     return encodeFilename(s, True)
2321
2322
2323 def decodeArgument(b):
2324     return decodeFilename(b, True)
2325
2326
2327 def decodeOption(optval):
2328     if optval is None:
2329         return optval
2330     if isinstance(optval, bytes):
2331         optval = optval.decode(preferredencoding())
2332
2333     assert isinstance(optval, compat_str)
2334     return optval
2335
2336
2337 def formatSeconds(secs, delim=':', msec=False):
2338     if secs > 3600:
2339         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2340     elif secs > 60:
2341         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2342     else:
2343         ret = '%d' % secs
2344     return '%s.%03d' % (ret, secs % 1) if msec else ret
2345
2346
2347 def make_HTTPS_handler(params, **kwargs):
2348     opts_no_check_certificate = params.get('nocheckcertificate', False)
2349     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2350         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2351         if opts_no_check_certificate:
2352             context.check_hostname = False
2353             context.verify_mode = ssl.CERT_NONE
2354         try:
2355             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2356         except TypeError:
2357             # Python 2.7.8
2358             # (create_default_context present but HTTPSHandler has no context=)
2359             pass
2360
2361     if sys.version_info < (3, 2):
2362         return YoutubeDLHTTPSHandler(params, **kwargs)
2363     else:  # Python < 3.4
2364         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2365         context.verify_mode = (ssl.CERT_NONE
2366                                if opts_no_check_certificate
2367                                else ssl.CERT_REQUIRED)
2368         context.set_default_verify_paths()
2369         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2370
2371
2372 def bug_reports_message(before=';'):
2373     if ytdl_is_updateable():
2374         update_cmd = 'type  yt-dlp -U  to update'
2375     else:
2376         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2377     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2378     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2379     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2380
2381     before = before.rstrip()
2382     if not before or before.endswith(('.', '!', '?')):
2383         msg = msg[0].title() + msg[1:]
2384
2385     return (before + ' ' if before else '') + msg
2386
2387
2388 class YoutubeDLError(Exception):
2389     """Base exception for YoutubeDL errors."""
2390     pass
2391
2392
2393 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394 if hasattr(ssl, 'CertificateError'):
2395     network_exceptions.append(ssl.CertificateError)
2396 network_exceptions = tuple(network_exceptions)
2397
2398
2399 class ExtractorError(YoutubeDLError):
2400     """Error during info extraction."""
2401
2402     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2403         """ tb, if given, is the original traceback (so that it can be printed out).
2404         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2405         """
2406
2407         if sys.exc_info()[0] in network_exceptions:
2408             expected = True
2409         if video_id is not None:
2410             msg = video_id + ': ' + msg
2411         if cause:
2412             msg += ' (caused by %r)' % cause
2413         if not expected:
2414             msg += bug_reports_message()
2415         super(ExtractorError, self).__init__(msg)
2416
2417         self.traceback = tb
2418         self.exc_info = sys.exc_info()  # preserve original exception
2419         self.cause = cause
2420         self.video_id = video_id
2421
2422     def format_traceback(self):
2423         if self.traceback is None:
2424             return None
2425         return ''.join(traceback.format_tb(self.traceback))
2426
2427
2428 class UnsupportedError(ExtractorError):
2429     def __init__(self, url):
2430         super(UnsupportedError, self).__init__(
2431             'Unsupported URL: %s' % url, expected=True)
2432         self.url = url
2433
2434
2435 class RegexNotFoundError(ExtractorError):
2436     """Error when a regex didn't match"""
2437     pass
2438
2439
2440 class GeoRestrictedError(ExtractorError):
2441     """Geographic restriction Error exception.
2442
2443     This exception may be thrown when a video is not available from your
2444     geographic location due to geographic restrictions imposed by a website.
2445     """
2446
2447     def __init__(self, msg, countries=None):
2448         super(GeoRestrictedError, self).__init__(msg, expected=True)
2449         self.msg = msg
2450         self.countries = countries
2451
2452
2453 class DownloadError(YoutubeDLError):
2454     """Download Error exception.
2455
2456     This exception may be thrown by FileDownloader objects if they are not
2457     configured to continue on errors. They will contain the appropriate
2458     error message.
2459     """
2460
2461     def __init__(self, msg, exc_info=None):
2462         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463         super(DownloadError, self).__init__(msg)
2464         self.exc_info = exc_info
2465
2466
2467 class EntryNotInPlaylist(YoutubeDLError):
2468     """Entry not in playlist exception.
2469
2470     This exception will be thrown by YoutubeDL when a requested entry
2471     is not found in the playlist info_dict
2472     """
2473     pass
2474
2475
2476 class SameFileError(YoutubeDLError):
2477     """Same File exception.
2478
2479     This exception will be thrown by FileDownloader objects if they detect
2480     multiple files would have to be downloaded to the same file on disk.
2481     """
2482     pass
2483
2484
2485 class PostProcessingError(YoutubeDLError):
2486     """Post Processing exception.
2487
2488     This exception may be raised by PostProcessor's .run() method to
2489     indicate an error in the postprocessing task.
2490     """
2491
2492     def __init__(self, msg):
2493         super(PostProcessingError, self).__init__(msg)
2494         self.msg = msg
2495
2496
2497 class ExistingVideoReached(YoutubeDLError):
2498     """ --max-downloads limit has been reached. """
2499     pass
2500
2501
2502 class RejectedVideoReached(YoutubeDLError):
2503     """ --max-downloads limit has been reached. """
2504     pass
2505
2506
2507 class ThrottledDownload(YoutubeDLError):
2508     """ Download speed below --throttled-rate. """
2509     pass
2510
2511
2512 class MaxDownloadsReached(YoutubeDLError):
2513     """ --max-downloads limit has been reached. """
2514     pass
2515
2516
2517 class UnavailableVideoError(YoutubeDLError):
2518     """Unavailable Format exception.
2519
2520     This exception will be thrown when a video is requested
2521     in a format that is not available for that video.
2522     """
2523     pass
2524
2525
2526 class ContentTooShortError(YoutubeDLError):
2527     """Content Too Short exception.
2528
2529     This exception may be raised by FileDownloader objects when a file they
2530     download is too small for what the server announced first, indicating
2531     the connection was probably interrupted.
2532     """
2533
2534     def __init__(self, downloaded, expected):
2535         super(ContentTooShortError, self).__init__(
2536             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537         )
2538         # Both in bytes
2539         self.downloaded = downloaded
2540         self.expected = expected
2541
2542
2543 class XAttrMetadataError(YoutubeDLError):
2544     def __init__(self, code=None, msg='Unknown error'):
2545         super(XAttrMetadataError, self).__init__(msg)
2546         self.code = code
2547         self.msg = msg
2548
2549         # Parsing code and msg
2550         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2551                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2552             self.reason = 'NO_SPACE'
2553         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554             self.reason = 'VALUE_TOO_LONG'
2555         else:
2556             self.reason = 'NOT_SUPPORTED'
2557
2558
2559 class XAttrUnavailableError(YoutubeDLError):
2560     pass
2561
2562
2563 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2564     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565     # expected HTTP responses to meet HTTP/1.0 or later (see also
2566     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2567     if sys.version_info < (3, 0):
2568         kwargs['strict'] = True
2569     hc = http_class(*args, **compat_kwargs(kwargs))
2570     source_address = ydl_handler._params.get('source_address')
2571
2572     if source_address is not None:
2573         # This is to workaround _create_connection() from socket where it will try all
2574         # address data from getaddrinfo() including IPv6. This filters the result from
2575         # getaddrinfo() based on the source_address value.
2576         # This is based on the cpython socket.create_connection() function.
2577         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579             host, port = address
2580             err = None
2581             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2582             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583             ip_addrs = [addr for addr in addrs if addr[0] == af]
2584             if addrs and not ip_addrs:
2585                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586                 raise socket.error(
2587                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588                     % (ip_version, source_address[0]))
2589             for res in ip_addrs:
2590                 af, socktype, proto, canonname, sa = res
2591                 sock = None
2592                 try:
2593                     sock = socket.socket(af, socktype, proto)
2594                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595                         sock.settimeout(timeout)
2596                     sock.bind(source_address)
2597                     sock.connect(sa)
2598                     err = None  # Explicitly break reference cycle
2599                     return sock
2600                 except socket.error as _:
2601                     err = _
2602                     if sock is not None:
2603                         sock.close()
2604             if err is not None:
2605                 raise err
2606             else:
2607                 raise socket.error('getaddrinfo returns an empty list')
2608         if hasattr(hc, '_create_connection'):
2609             hc._create_connection = _create_connection
2610         sa = (source_address, 0)
2611         if hasattr(hc, 'source_address'):  # Python 2.7+
2612             hc.source_address = sa
2613         else:  # Python 2.6
2614             def _hc_connect(self, *args, **kwargs):
2615                 sock = _create_connection(
2616                     (self.host, self.port), self.timeout, sa)
2617                 if is_https:
2618                     self.sock = ssl.wrap_socket(
2619                         sock, self.key_file, self.cert_file,
2620                         ssl_version=ssl.PROTOCOL_TLSv1)
2621                 else:
2622                     self.sock = sock
2623             hc.connect = functools.partial(_hc_connect, hc)
2624
2625     return hc
2626
2627
2628 def handle_youtubedl_headers(headers):
2629     filtered_headers = headers
2630
2631     if 'Youtubedl-no-compression' in filtered_headers:
2632         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2633         del filtered_headers['Youtubedl-no-compression']
2634
2635     return filtered_headers
2636
2637
2638 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2639     """Handler for HTTP requests and responses.
2640
2641     This class, when installed with an OpenerDirector, automatically adds
2642     the standard headers to every HTTP request and handles gzipped and
2643     deflated responses from web servers. If compression is to be avoided in
2644     a particular request, the original request in the program code only has
2645     to include the HTTP header "Youtubedl-no-compression", which will be
2646     removed before making the real request.
2647
2648     Part of this code was copied from:
2649
2650     http://techknack.net/python-urllib2-handlers/
2651
2652     Andrew Rowls, the author of that code, agreed to release it to the
2653     public domain.
2654     """
2655
2656     def __init__(self, params, *args, **kwargs):
2657         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658         self._params = params
2659
2660     def http_open(self, req):
2661         conn_class = compat_http_client.HTTPConnection
2662
2663         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664         if socks_proxy:
2665             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666             del req.headers['Ytdl-socks-proxy']
2667
2668         return self.do_open(functools.partial(
2669             _create_http_connection, self, conn_class, False),
2670             req)
2671
2672     @staticmethod
2673     def deflate(data):
2674         if not data:
2675             return data
2676         try:
2677             return zlib.decompress(data, -zlib.MAX_WBITS)
2678         except zlib.error:
2679             return zlib.decompress(data)
2680
2681     def http_request(self, req):
2682         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683         # always respected by websites, some tend to give out URLs with non percent-encoded
2684         # non-ASCII characters (see telemb.py, ard.py [#3412])
2685         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686         # To work around aforementioned issue we will replace request's original URL with
2687         # percent-encoded one
2688         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690         url = req.get_full_url()
2691         url_escaped = escape_url(url)
2692
2693         # Substitute URL if any change after escaping
2694         if url != url_escaped:
2695             req = update_Request(req, url=url_escaped)
2696
2697         for h, v in std_headers.items():
2698             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699             # The dict keys are capitalized because of this bug by urllib
2700             if h.capitalize() not in req.headers:
2701                 req.add_header(h, v)
2702
2703         req.headers = handle_youtubedl_headers(req.headers)
2704
2705         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706             # Python 2.6 is brain-dead when it comes to fragments
2707             req._Request__original = req._Request__original.partition('#')[0]
2708             req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
2710         return req
2711
2712     def http_response(self, req, resp):
2713         old_resp = resp
2714         # gzip
2715         if resp.headers.get('Content-encoding', '') == 'gzip':
2716             content = resp.read()
2717             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718             try:
2719                 uncompressed = io.BytesIO(gz.read())
2720             except IOError as original_ioerror:
2721                 # There may be junk add the end of the file
2722                 # See http://stackoverflow.com/q/4928560/35070 for details
2723                 for i in range(1, 1024):
2724                     try:
2725                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726                         uncompressed = io.BytesIO(gz.read())
2727                     except IOError:
2728                         continue
2729                     break
2730                 else:
2731                     raise original_ioerror
2732             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2733             resp.msg = old_resp.msg
2734             del resp.headers['Content-encoding']
2735         # deflate
2736         if resp.headers.get('Content-encoding', '') == 'deflate':
2737             gz = io.BytesIO(self.deflate(resp.read()))
2738             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2739             resp.msg = old_resp.msg
2740             del resp.headers['Content-encoding']
2741         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2742         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2743         if 300 <= resp.code < 400:
2744             location = resp.headers.get('Location')
2745             if location:
2746                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747                 if sys.version_info >= (3, 0):
2748                     location = location.encode('iso-8859-1').decode('utf-8')
2749                 else:
2750                     location = location.decode('utf-8')
2751                 location_escaped = escape_url(location)
2752                 if location != location_escaped:
2753                     del resp.headers['Location']
2754                     if sys.version_info < (3, 0):
2755                         location_escaped = location_escaped.encode('utf-8')
2756                     resp.headers['Location'] = location_escaped
2757         return resp
2758
2759     https_request = http_request
2760     https_response = http_response
2761
2762
2763 def make_socks_conn_class(base_class, socks_proxy):
2764     assert issubclass(base_class, (
2765         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767     url_components = compat_urlparse.urlparse(socks_proxy)
2768     if url_components.scheme.lower() == 'socks5':
2769         socks_type = ProxyType.SOCKS5
2770     elif url_components.scheme.lower() in ('socks', 'socks4'):
2771         socks_type = ProxyType.SOCKS4
2772     elif url_components.scheme.lower() == 'socks4a':
2773         socks_type = ProxyType.SOCKS4A
2774
2775     def unquote_if_non_empty(s):
2776         if not s:
2777             return s
2778         return compat_urllib_parse_unquote_plus(s)
2779
2780     proxy_args = (
2781         socks_type,
2782         url_components.hostname, url_components.port or 1080,
2783         True,  # Remote DNS
2784         unquote_if_non_empty(url_components.username),
2785         unquote_if_non_empty(url_components.password),
2786     )
2787
2788     class SocksConnection(base_class):
2789         def connect(self):
2790             self.sock = sockssocket()
2791             self.sock.setproxy(*proxy_args)
2792             if type(self.timeout) in (int, float):
2793                 self.sock.settimeout(self.timeout)
2794             self.sock.connect((self.host, self.port))
2795
2796             if isinstance(self, compat_http_client.HTTPSConnection):
2797                 if hasattr(self, '_context'):  # Python > 2.6
2798                     self.sock = self._context.wrap_socket(
2799                         self.sock, server_hostname=self.host)
2800                 else:
2801                     self.sock = ssl.wrap_socket(self.sock)
2802
2803     return SocksConnection
2804
2805
2806 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810         self._params = params
2811
2812     def https_open(self, req):
2813         kwargs = {}
2814         conn_class = self._https_conn_class
2815
2816         if hasattr(self, '_context'):  # python > 2.6
2817             kwargs['context'] = self._context
2818         if hasattr(self, '_check_hostname'):  # python 3.x
2819             kwargs['check_hostname'] = self._check_hostname
2820
2821         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822         if socks_proxy:
2823             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824             del req.headers['Ytdl-socks-proxy']
2825
2826         return self.do_open(functools.partial(
2827             _create_http_connection, self, conn_class, True),
2828             req, **kwargs)
2829
2830
2831 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2832     """
2833     See [1] for cookie file format.
2834
2835     1. https://curl.haxx.se/docs/http-cookies.html
2836     """
2837     _HTTPONLY_PREFIX = '#HttpOnly_'
2838     _ENTRY_LEN = 7
2839     _HEADER = '''# Netscape HTTP Cookie File
2840 # This file is generated by yt-dlp.  Do not edit.
2841
2842 '''
2843     _CookieFileEntry = collections.namedtuple(
2844         'CookieFileEntry',
2845         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2846
2847     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2848         """
2849         Save cookies to a file.
2850
2851         Most of the code is taken from CPython 3.8 and slightly adapted
2852         to support cookie files with UTF-8 in both python 2 and 3.
2853         """
2854         if filename is None:
2855             if self.filename is not None:
2856                 filename = self.filename
2857             else:
2858                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
2860         # Store session cookies with `expires` set to 0 instead of an empty
2861         # string
2862         for cookie in self:
2863             if cookie.expires is None:
2864                 cookie.expires = 0
2865
2866         with io.open(filename, 'w', encoding='utf-8') as f:
2867             f.write(self._HEADER)
2868             now = time.time()
2869             for cookie in self:
2870                 if not ignore_discard and cookie.discard:
2871                     continue
2872                 if not ignore_expires and cookie.is_expired(now):
2873                     continue
2874                 if cookie.secure:
2875                     secure = 'TRUE'
2876                 else:
2877                     secure = 'FALSE'
2878                 if cookie.domain.startswith('.'):
2879                     initial_dot = 'TRUE'
2880                 else:
2881                     initial_dot = 'FALSE'
2882                 if cookie.expires is not None:
2883                     expires = compat_str(cookie.expires)
2884                 else:
2885                     expires = ''
2886                 if cookie.value is None:
2887                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888                     # with no name, whereas http.cookiejar regards it as a
2889                     # cookie with no value.
2890                     name = ''
2891                     value = cookie.name
2892                 else:
2893                     name = cookie.name
2894                     value = cookie.value
2895                 f.write(
2896                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2897                                secure, expires, name, value]) + '\n')
2898
2899     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2900         """Load cookies from a file."""
2901         if filename is None:
2902             if self.filename is not None:
2903                 filename = self.filename
2904             else:
2905                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
2907         def prepare_line(line):
2908             if line.startswith(self._HTTPONLY_PREFIX):
2909                 line = line[len(self._HTTPONLY_PREFIX):]
2910             # comments and empty lines are fine
2911             if line.startswith('#') or not line.strip():
2912                 return line
2913             cookie_list = line.split('\t')
2914             if len(cookie_list) != self._ENTRY_LEN:
2915                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916             cookie = self._CookieFileEntry(*cookie_list)
2917             if cookie.expires_at and not cookie.expires_at.isdigit():
2918                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919             return line
2920
2921         cf = io.StringIO()
2922         with io.open(filename, encoding='utf-8') as f:
2923             for line in f:
2924                 try:
2925                     cf.write(prepare_line(line))
2926                 except compat_cookiejar.LoadError as e:
2927                     write_string(
2928                         'WARNING: skipping cookie file entry due to %s: %r\n'
2929                         % (e, line), sys.stderr)
2930                     continue
2931         cf.seek(0)
2932         self._really_load(cf, filename, ignore_discard, ignore_expires)
2933         # Session cookies are denoted by either `expires` field set to
2934         # an empty string or 0. MozillaCookieJar only recognizes the former
2935         # (see [1]). So we need force the latter to be recognized as session
2936         # cookies on our own.
2937         # Session cookies may be important for cookies-based authentication,
2938         # e.g. usually, when user does not check 'Remember me' check box while
2939         # logging in on a site, some important cookies are stored as session
2940         # cookies so that not recognizing them will result in failed login.
2941         # 1. https://bugs.python.org/issue17164
2942         for cookie in self:
2943             # Treat `expires=0` cookies as session cookies
2944             if cookie.expires == 0:
2945                 cookie.expires = None
2946                 cookie.discard = True
2947
2948
2949 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950     def __init__(self, cookiejar=None):
2951         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953     def http_response(self, request, response):
2954         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955         # characters in Set-Cookie HTTP header of last response (see
2956         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2957         # In order to at least prevent crashing we will percent encode Set-Cookie
2958         # header before HTTPCookieProcessor starts processing it.
2959         # if sys.version_info < (3, 0) and response.headers:
2960         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961         #         set_cookie = response.headers.get(set_cookie_header)
2962         #         if set_cookie:
2963         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964         #             if set_cookie != set_cookie_escaped:
2965         #                 del response.headers[set_cookie_header]
2966         #                 response.headers[set_cookie_header] = set_cookie_escaped
2967         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
2969     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2970     https_response = http_response
2971
2972
2973 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2974     """YoutubeDL redirect handler
2975
2976     The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978     This redirect handler solves two issues:
2979      - ensures redirect URL is always unicode under python 2
2980      - introduces support for experimental HTTP response status code
2981        308 Permanent Redirect [2] used by some sites [3]
2982
2983     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986     """
2987
2988     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990     def redirect_request(self, req, fp, code, msg, headers, newurl):
2991         """Return a Request or None in response to a redirect.
2992
2993         This is called by the http_error_30x methods when a
2994         redirection response is received.  If a redirection should
2995         take place, return a new Request to allow http_error_30x to
2996         perform the redirect.  Otherwise, raise HTTPError if no-one
2997         else should try to handle this url.  Return None if you can't
2998         but another Handler might.
2999         """
3000         m = req.get_method()
3001         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002                  or code in (301, 302, 303) and m == "POST")):
3003             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004         # Strictly (according to RFC 2616), 301 or 302 in response to
3005         # a POST MUST NOT cause a redirection without confirmation
3006         # from the user (of urllib.request, in this case).  In practice,
3007         # essentially all clients do redirect in this case, so we do
3008         # the same.
3009
3010         # On python 2 urlh.geturl() may sometimes return redirect URL
3011         # as byte string instead of unicode. This workaround allows
3012         # to force it always return unicode.
3013         if sys.version_info[0] < 3:
3014             newurl = compat_str(newurl)
3015
3016         # Be conciliant with URIs containing a space.  This is mainly
3017         # redundant with the more complete encoding done in http_error_302(),
3018         # but it is kept for compatibility with other callers.
3019         newurl = newurl.replace(' ', '%20')
3020
3021         CONTENT_HEADERS = ("content-length", "content-type")
3022         # NB: don't use dict comprehension for python 2.6 compatibility
3023         newheaders = dict((k, v) for k, v in req.headers.items()
3024                           if k.lower() not in CONTENT_HEADERS)
3025         return compat_urllib_request.Request(
3026             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027             unverifiable=True)
3028
3029
3030 def extract_timezone(date_str):
3031     m = re.search(
3032         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033         date_str)
3034     if not m:
3035         timezone = datetime.timedelta()
3036     else:
3037         date_str = date_str[:-len(m.group('tz'))]
3038         if not m.group('sign'):
3039             timezone = datetime.timedelta()
3040         else:
3041             sign = 1 if m.group('sign') == '+' else -1
3042             timezone = datetime.timedelta(
3043                 hours=sign * int(m.group('hours')),
3044                 minutes=sign * int(m.group('minutes')))
3045     return timezone, date_str
3046
3047
3048 def parse_iso8601(date_str, delimiter='T', timezone=None):
3049     """ Return a UNIX timestamp from the given date """
3050
3051     if date_str is None:
3052         return None
3053
3054     date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
3056     if timezone is None:
3057         timezone, date_str = extract_timezone(date_str)
3058
3059     try:
3060         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062         return calendar.timegm(dt.timetuple())
3063     except ValueError:
3064         pass
3065
3066
3067 def date_formats(day_first=True):
3068     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
3071 def unified_strdate(date_str, day_first=True):
3072     """Return a string with the date in the format YYYYMMDD"""
3073
3074     if date_str is None:
3075         return None
3076     upload_date = None
3077     # Replace commas
3078     date_str = date_str.replace(',', ' ')
3079     # Remove AM/PM + timezone
3080     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3081     _, date_str = extract_timezone(date_str)
3082
3083     for expression in date_formats(day_first):
3084         try:
3085             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3086         except ValueError:
3087             pass
3088     if upload_date is None:
3089         timetuple = email.utils.parsedate_tz(date_str)
3090         if timetuple:
3091             try:
3092                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093             except ValueError:
3094                 pass
3095     if upload_date is not None:
3096         return compat_str(upload_date)
3097
3098
3099 def unified_timestamp(date_str, day_first=True):
3100     if date_str is None:
3101         return None
3102
3103     date_str = re.sub(r'[,|]', '', date_str)
3104
3105     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3106     timezone, date_str = extract_timezone(date_str)
3107
3108     # Remove AM/PM + timezone
3109     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
3111     # Remove unrecognized timezones from ISO 8601 alike timestamps
3112     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113     if m:
3114         date_str = date_str[:-len(m.group('tz'))]
3115
3116     # Python only supports microseconds, so remove nanoseconds
3117     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118     if m:
3119         date_str = m.group(1)
3120
3121     for expression in date_formats(day_first):
3122         try:
3123             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3124             return calendar.timegm(dt.timetuple())
3125         except ValueError:
3126             pass
3127     timetuple = email.utils.parsedate_tz(date_str)
3128     if timetuple:
3129         return calendar.timegm(timetuple) + pm_delta * 3600
3130
3131
3132 def determine_ext(url, default_ext='unknown_video'):
3133     if url is None or '.' not in url:
3134         return default_ext
3135     guess = url.partition('?')[0].rpartition('.')[2]
3136     if re.match(r'^[A-Za-z0-9]+$', guess):
3137         return guess
3138     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3140         return guess.rstrip('/')
3141     else:
3142         return default_ext
3143
3144
3145 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3147
3148
3149 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3150     """
3151     Return a datetime object from a string in the format YYYYMMDD or
3152     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154     format: string date format used to return datetime object from
3155     precision: round the time portion of a datetime object.
3156                 auto|microsecond|second|minute|hour|day.
3157                 auto: round to the unit provided in date_str (if applicable).
3158     """
3159     auto_precision = False
3160     if precision == 'auto':
3161         auto_precision = True
3162         precision = 'microsecond'
3163     today = datetime_round(datetime.datetime.now(), precision)
3164     if date_str in ('now', 'today'):
3165         return today
3166     if date_str == 'yesterday':
3167         return today - datetime.timedelta(days=1)
3168     match = re.match(
3169         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170         date_str)
3171     if match is not None:
3172         start_time = datetime_from_str(match.group('start'), precision, format)
3173         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3174         unit = match.group('unit')
3175         if unit == 'month' or unit == 'year':
3176             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3177             unit = 'day'
3178         else:
3179             if unit == 'week':
3180                 unit = 'day'
3181                 time *= 7
3182             delta = datetime.timedelta(**{unit + 's': time})
3183             new_date = start_time + delta
3184         if auto_precision:
3185             return datetime_round(new_date, unit)
3186         return new_date
3187
3188     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191 def date_from_str(date_str, format='%Y%m%d'):
3192     """
3193     Return a datetime object from a string in the format YYYYMMDD or
3194     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196     format: string date format used to return datetime object from
3197     """
3198     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201 def datetime_add_months(dt, months):
3202     """Increment/Decrement a datetime object by months."""
3203     month = dt.month + months - 1
3204     year = dt.year + month // 12
3205     month = month % 12 + 1
3206     day = min(dt.day, calendar.monthrange(year, month)[1])
3207     return dt.replace(year, month, day)
3208
3209
3210 def datetime_round(dt, precision='day'):
3211     """
3212     Round a datetime object's time to a specific precision
3213     """
3214     if precision == 'microsecond':
3215         return dt
3216
3217     unit_seconds = {
3218         'day': 86400,
3219         'hour': 3600,
3220         'minute': 60,
3221         'second': 1,
3222     }
3223     roundto = lambda x, n: ((x + n / 2) // n) * n
3224     timestamp = calendar.timegm(dt.timetuple())
3225     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3226
3227
3228 def hyphenate_date(date_str):
3229     """
3230     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232     if match is not None:
3233         return '-'.join(match.groups())
3234     else:
3235         return date_str
3236
3237
3238 class DateRange(object):
3239     """Represents a time interval between two dates"""
3240
3241     def __init__(self, start=None, end=None):
3242         """start and end must be strings in the format accepted by date"""
3243         if start is not None:
3244             self.start = date_from_str(start)
3245         else:
3246             self.start = datetime.datetime.min.date()
3247         if end is not None:
3248             self.end = date_from_str(end)
3249         else:
3250             self.end = datetime.datetime.max.date()
3251         if self.start > self.end:
3252             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3253
3254     @classmethod
3255     def day(cls, day):
3256         """Returns a range that only contains the given day"""
3257         return cls(day, day)
3258
3259     def __contains__(self, date):
3260         """Check if the date is in the range"""
3261         if not isinstance(date, datetime.date):
3262             date = date_from_str(date)
3263         return self.start <= date <= self.end
3264
3265     def __str__(self):
3266         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3267
3268
3269 def platform_name():
3270     """ Returns the platform name as a compat_str """
3271     res = platform.platform()
3272     if isinstance(res, bytes):
3273         res = res.decode(preferredencoding())
3274
3275     assert isinstance(res, compat_str)
3276     return res
3277
3278
3279 def _windows_write_string(s, out):
3280     """ Returns True if the string was written using special methods,
3281     False if it has yet to be written out."""
3282     # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284     import ctypes
3285     import ctypes.wintypes
3286
3287     WIN_OUTPUT_IDS = {
3288         1: -11,
3289         2: -12,
3290     }
3291
3292     try:
3293         fileno = out.fileno()
3294     except AttributeError:
3295         # If the output stream doesn't have a fileno, it's virtual
3296         return False
3297     except io.UnsupportedOperation:
3298         # Some strange Windows pseudo files?
3299         return False
3300     if fileno not in WIN_OUTPUT_IDS:
3301         return False
3302
3303     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3304         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3305         ('GetStdHandle', ctypes.windll.kernel32))
3306     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
3308     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3309         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3311         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3312     written = ctypes.wintypes.DWORD(0)
3313
3314     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3315     FILE_TYPE_CHAR = 0x0002
3316     FILE_TYPE_REMOTE = 0x8000
3317     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3318         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319         ctypes.POINTER(ctypes.wintypes.DWORD))(
3320         ('GetConsoleMode', ctypes.windll.kernel32))
3321     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323     def not_a_console(handle):
3324         if handle == INVALID_HANDLE_VALUE or handle is None:
3325             return True
3326         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3328
3329     if not_a_console(h):
3330         return False
3331
3332     def next_nonbmp_pos(s):
3333         try:
3334             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335         except StopIteration:
3336             return len(s)
3337
3338     while s:
3339         count = min(next_nonbmp_pos(s), 1024)
3340
3341         ret = WriteConsoleW(
3342             h, s, count if count else 2, ctypes.byref(written), None)
3343         if ret == 0:
3344             raise OSError('Failed to write string')
3345         if not count:  # We just wrote a non-BMP character
3346             assert written.value == 2
3347             s = s[1:]
3348         else:
3349             assert written.value > 0
3350             s = s[written.value:]
3351     return True
3352
3353
3354 def write_string(s, out=None, encoding=None):
3355     if out is None:
3356         out = sys.stderr
3357     assert type(s) == compat_str
3358
3359     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360         if _windows_write_string(s, out):
3361             return
3362
3363     if ('b' in getattr(out, 'mode', '')
3364             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3365         byt = s.encode(encoding or preferredencoding(), 'ignore')
3366         out.write(byt)
3367     elif hasattr(out, 'buffer'):
3368         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369         byt = s.encode(enc, 'ignore')
3370         out.buffer.write(byt)
3371     else:
3372         out.write(s)
3373     out.flush()
3374
3375
3376 def bytes_to_intlist(bs):
3377     if not bs:
3378         return []
3379     if isinstance(bs[0], int):  # Python 3
3380         return list(bs)
3381     else:
3382         return [ord(c) for c in bs]
3383
3384
3385 def intlist_to_bytes(xs):
3386     if not xs:
3387         return b''
3388     return compat_struct_pack('%dB' % len(xs), *xs)
3389
3390
3391 # Cross-platform file locking
3392 if sys.platform == 'win32':
3393     import ctypes.wintypes
3394     import msvcrt
3395
3396     class OVERLAPPED(ctypes.Structure):
3397         _fields_ = [
3398             ('Internal', ctypes.wintypes.LPVOID),
3399             ('InternalHigh', ctypes.wintypes.LPVOID),
3400             ('Offset', ctypes.wintypes.DWORD),
3401             ('OffsetHigh', ctypes.wintypes.DWORD),
3402             ('hEvent', ctypes.wintypes.HANDLE),
3403         ]
3404
3405     kernel32 = ctypes.windll.kernel32
3406     LockFileEx = kernel32.LockFileEx
3407     LockFileEx.argtypes = [
3408         ctypes.wintypes.HANDLE,     # hFile
3409         ctypes.wintypes.DWORD,      # dwFlags
3410         ctypes.wintypes.DWORD,      # dwReserved
3411         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3412         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3413         ctypes.POINTER(OVERLAPPED)  # Overlapped
3414     ]
3415     LockFileEx.restype = ctypes.wintypes.BOOL
3416     UnlockFileEx = kernel32.UnlockFileEx
3417     UnlockFileEx.argtypes = [
3418         ctypes.wintypes.HANDLE,     # hFile
3419         ctypes.wintypes.DWORD,      # dwReserved
3420         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3421         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3422         ctypes.POINTER(OVERLAPPED)  # Overlapped
3423     ]
3424     UnlockFileEx.restype = ctypes.wintypes.BOOL
3425     whole_low = 0xffffffff
3426     whole_high = 0x7fffffff
3427
3428     def _lock_file(f, exclusive):
3429         overlapped = OVERLAPPED()
3430         overlapped.Offset = 0
3431         overlapped.OffsetHigh = 0
3432         overlapped.hEvent = 0
3433         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434         handle = msvcrt.get_osfhandle(f.fileno())
3435         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436                           whole_low, whole_high, f._lock_file_overlapped_p):
3437             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439     def _unlock_file(f):
3440         assert f._lock_file_overlapped_p
3441         handle = msvcrt.get_osfhandle(f.fileno())
3442         if not UnlockFileEx(handle, 0,
3443                             whole_low, whole_high, f._lock_file_overlapped_p):
3444             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446 else:
3447     # Some platforms, such as Jython, is missing fcntl
3448     try:
3449         import fcntl
3450
3451         def _lock_file(f, exclusive):
3452             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3453
3454         def _unlock_file(f):
3455             fcntl.flock(f, fcntl.LOCK_UN)
3456     except ImportError:
3457         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459         def _lock_file(f, exclusive):
3460             raise IOError(UNSUPPORTED_MSG)
3461
3462         def _unlock_file(f):
3463             raise IOError(UNSUPPORTED_MSG)
3464
3465
3466 class locked_file(object):
3467     def __init__(self, filename, mode, encoding=None):
3468         assert mode in ['r', 'a', 'w']
3469         self.f = io.open(filename, mode, encoding=encoding)
3470         self.mode = mode
3471
3472     def __enter__(self):
3473         exclusive = self.mode != 'r'
3474         try:
3475             _lock_file(self.f, exclusive)
3476         except IOError:
3477             self.f.close()
3478             raise
3479         return self
3480
3481     def __exit__(self, etype, value, traceback):
3482         try:
3483             _unlock_file(self.f)
3484         finally:
3485             self.f.close()
3486
3487     def __iter__(self):
3488         return iter(self.f)
3489
3490     def write(self, *args):
3491         return self.f.write(*args)
3492
3493     def read(self, *args):
3494         return self.f.read(*args)
3495
3496
3497 def get_filesystem_encoding():
3498     encoding = sys.getfilesystemencoding()
3499     return encoding if encoding is not None else 'utf-8'
3500
3501
3502 def shell_quote(args):
3503     quoted_args = []
3504     encoding = get_filesystem_encoding()
3505     for a in args:
3506         if isinstance(a, bytes):
3507             # We may get a filename encoded with 'encodeFilename'
3508             a = a.decode(encoding)
3509         quoted_args.append(compat_shlex_quote(a))
3510     return ' '.join(quoted_args)
3511
3512
3513 def smuggle_url(url, data):
3514     """ Pass additional data in a URL for internal use. """
3515
3516     url, idata = unsmuggle_url(url, {})
3517     data.update(idata)
3518     sdata = compat_urllib_parse_urlencode(
3519         {'__youtubedl_smuggle': json.dumps(data)})
3520     return url + '#' + sdata
3521
3522
3523 def unsmuggle_url(smug_url, default=None):
3524     if '#__youtubedl_smuggle' not in smug_url:
3525         return smug_url, default
3526     url, _, sdata = smug_url.rpartition('#')
3527     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3528     data = json.loads(jsond)
3529     return url, data
3530
3531
3532 def format_bytes(bytes):
3533     if bytes is None:
3534         return 'N/A'
3535     if type(bytes) is str:
3536         bytes = float(bytes)
3537     if bytes == 0.0:
3538         exponent = 0
3539     else:
3540         exponent = int(math.log(bytes, 1024.0))
3541     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3542     converted = float(bytes) / float(1024 ** exponent)
3543     return '%.2f%s' % (converted, suffix)
3544
3545
3546 def lookup_unit_table(unit_table, s):
3547     units_re = '|'.join(re.escape(u) for u in unit_table)
3548     m = re.match(
3549         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3550     if not m:
3551         return None
3552     num_str = m.group('num').replace(',', '.')
3553     mult = unit_table[m.group('unit')]
3554     return int(float(num_str) * mult)
3555
3556
3557 def parse_filesize(s):
3558     if s is None:
3559         return None
3560
3561     # The lower-case forms are of course incorrect and unofficial,
3562     # but we support those too
3563     _UNIT_TABLE = {
3564         'B': 1,
3565         'b': 1,
3566         'bytes': 1,
3567         'KiB': 1024,
3568         'KB': 1000,
3569         'kB': 1024,
3570         'Kb': 1000,
3571         'kb': 1000,
3572         'kilobytes': 1000,
3573         'kibibytes': 1024,
3574         'MiB': 1024 ** 2,
3575         'MB': 1000 ** 2,
3576         'mB': 1024 ** 2,
3577         'Mb': 1000 ** 2,
3578         'mb': 1000 ** 2,
3579         'megabytes': 1000 ** 2,
3580         'mebibytes': 1024 ** 2,
3581         'GiB': 1024 ** 3,
3582         'GB': 1000 ** 3,
3583         'gB': 1024 ** 3,
3584         'Gb': 1000 ** 3,
3585         'gb': 1000 ** 3,
3586         'gigabytes': 1000 ** 3,
3587         'gibibytes': 1024 ** 3,
3588         'TiB': 1024 ** 4,
3589         'TB': 1000 ** 4,
3590         'tB': 1024 ** 4,
3591         'Tb': 1000 ** 4,
3592         'tb': 1000 ** 4,
3593         'terabytes': 1000 ** 4,
3594         'tebibytes': 1024 ** 4,
3595         'PiB': 1024 ** 5,
3596         'PB': 1000 ** 5,
3597         'pB': 1024 ** 5,
3598         'Pb': 1000 ** 5,
3599         'pb': 1000 ** 5,
3600         'petabytes': 1000 ** 5,
3601         'pebibytes': 1024 ** 5,
3602         'EiB': 1024 ** 6,
3603         'EB': 1000 ** 6,
3604         'eB': 1024 ** 6,
3605         'Eb': 1000 ** 6,
3606         'eb': 1000 ** 6,
3607         'exabytes': 1000 ** 6,
3608         'exbibytes': 1024 ** 6,
3609         'ZiB': 1024 ** 7,
3610         'ZB': 1000 ** 7,
3611         'zB': 1024 ** 7,
3612         'Zb': 1000 ** 7,
3613         'zb': 1000 ** 7,
3614         'zettabytes': 1000 ** 7,
3615         'zebibytes': 1024 ** 7,
3616         'YiB': 1024 ** 8,
3617         'YB': 1000 ** 8,
3618         'yB': 1024 ** 8,
3619         'Yb': 1000 ** 8,
3620         'yb': 1000 ** 8,
3621         'yottabytes': 1000 ** 8,
3622         'yobibytes': 1024 ** 8,
3623     }
3624
3625     return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628 def parse_count(s):
3629     if s is None:
3630         return None
3631
3632     s = s.strip()
3633
3634     if re.match(r'^[\d,.]+$', s):
3635         return str_to_int(s)
3636
3637     _UNIT_TABLE = {
3638         'k': 1000,
3639         'K': 1000,
3640         'm': 1000 ** 2,
3641         'M': 1000 ** 2,
3642         'kk': 1000 ** 2,
3643         'KK': 1000 ** 2,
3644     }
3645
3646     return lookup_unit_table(_UNIT_TABLE, s)
3647
3648
3649 def parse_resolution(s):
3650     if s is None:
3651         return {}
3652
3653     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654     if mobj:
3655         return {
3656             'width': int(mobj.group('w')),
3657             'height': int(mobj.group('h')),
3658         }
3659
3660     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661     if mobj:
3662         return {'height': int(mobj.group(1))}
3663
3664     mobj = re.search(r'\b([48])[kK]\b', s)
3665     if mobj:
3666         return {'height': int(mobj.group(1)) * 540}
3667
3668     return {}
3669
3670
3671 def parse_bitrate(s):
3672     if not isinstance(s, compat_str):
3673         return
3674     mobj = re.search(r'\b(\d+)\s*kbps', s)
3675     if mobj:
3676         return int(mobj.group(1))
3677
3678
3679 def month_by_name(name, lang='en'):
3680     """ Return the number of a month by (locale-independently) English name """
3681
3682     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3683
3684     try:
3685         return month_names.index(name) + 1
3686     except ValueError:
3687         return None
3688
3689
3690 def month_by_abbreviation(abbrev):
3691     """ Return the number of a month by (locale-independently) English
3692         abbreviations """
3693
3694     try:
3695         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3696     except ValueError:
3697         return None
3698
3699
3700 def fix_xml_ampersands(xml_str):
3701     """Replace all the '&' by '&amp;' in XML"""
3702     return re.sub(
3703         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3704         '&amp;',
3705         xml_str)
3706
3707
3708 def setproctitle(title):
3709     assert isinstance(title, compat_str)
3710
3711     # ctypes in Jython is not complete
3712     # http://bugs.jython.org/issue2148
3713     if sys.platform.startswith('java'):
3714         return
3715
3716     try:
3717         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3718     except OSError:
3719         return
3720     except TypeError:
3721         # LoadLibrary in Windows Python 2.7.13 only expects
3722         # a bytestring, but since unicode_literals turns
3723         # every string into a unicode string, it fails.
3724         return
3725     title_bytes = title.encode('utf-8')
3726     buf = ctypes.create_string_buffer(len(title_bytes))
3727     buf.value = title_bytes
3728     try:
3729         libc.prctl(15, buf, 0, 0, 0)
3730     except AttributeError:
3731         return  # Strange libc, just skip this
3732
3733
3734 def remove_start(s, start):
3735     return s[len(start):] if s is not None and s.startswith(start) else s
3736
3737
3738 def remove_end(s, end):
3739     return s[:-len(end)] if s is not None and s.endswith(end) else s
3740
3741
3742 def remove_quotes(s):
3743     if s is None or len(s) < 2:
3744         return s
3745     for quote in ('"', "'", ):
3746         if s[0] == quote and s[-1] == quote:
3747             return s[1:-1]
3748     return s
3749
3750
3751 def get_domain(url):
3752     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753     return domain.group('domain') if domain else None
3754
3755
3756 def url_basename(url):
3757     path = compat_urlparse.urlparse(url).path
3758     return path.strip('/').split('/')[-1]
3759
3760
3761 def base_url(url):
3762     return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
3765 def urljoin(base, path):
3766     if isinstance(path, bytes):
3767         path = path.decode('utf-8')
3768     if not isinstance(path, compat_str) or not path:
3769         return None
3770     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3771         return path
3772     if isinstance(base, bytes):
3773         base = base.decode('utf-8')
3774     if not isinstance(base, compat_str) or not re.match(
3775             r'^(?:https?:)?//', base):
3776         return None
3777     return compat_urlparse.urljoin(base, path)
3778
3779
3780 class HEADRequest(compat_urllib_request.Request):
3781     def get_method(self):
3782         return 'HEAD'
3783
3784
3785 class PUTRequest(compat_urllib_request.Request):
3786     def get_method(self):
3787         return 'PUT'
3788
3789
3790 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3791     if get_attr:
3792         if v is not None:
3793             v = getattr(v, get_attr, None)
3794     if v == '':
3795         v = None
3796     if v is None:
3797         return default
3798     try:
3799         return int(v) * invscale // scale
3800     except (ValueError, TypeError):
3801         return default
3802
3803
3804 def str_or_none(v, default=None):
3805     return default if v is None else compat_str(v)
3806
3807
3808 def str_to_int(int_str):
3809     """ A more relaxed version of int_or_none """
3810     if isinstance(int_str, compat_integer_types):
3811         return int_str
3812     elif isinstance(int_str, compat_str):
3813         int_str = re.sub(r'[,\.\+]', '', int_str)
3814         return int_or_none(int_str)
3815
3816
3817 def float_or_none(v, scale=1, invscale=1, default=None):
3818     if v is None:
3819         return default
3820     try:
3821         return float(v) * invscale / scale
3822     except (ValueError, TypeError):
3823         return default
3824
3825
3826 def bool_or_none(v, default=None):
3827     return v if isinstance(v, bool) else default
3828
3829
3830 def strip_or_none(v, default=None):
3831     return v.strip() if isinstance(v, compat_str) else default
3832
3833
3834 def url_or_none(url):
3835     if not url or not isinstance(url, compat_str):
3836         return None
3837     url = url.strip()
3838     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3839
3840
3841 def strftime_or_none(timestamp, date_format, default=None):
3842     datetime_object = None
3843     try:
3844         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3845             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3847             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848         return datetime_object.strftime(date_format)
3849     except (ValueError, TypeError, AttributeError):
3850         return default
3851
3852
3853 def parse_duration(s):
3854     if not isinstance(s, compat_basestring):
3855         return None
3856
3857     s = s.strip()
3858
3859     days, hours, mins, secs, ms = [None] * 5
3860     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3861     if m:
3862         days, hours, mins, secs, ms = m.groups()
3863     else:
3864         m = re.match(
3865             r'''(?ix)(?:P?
3866                 (?:
3867                     [0-9]+\s*y(?:ears?)?\s*
3868                 )?
3869                 (?:
3870                     [0-9]+\s*m(?:onths?)?\s*
3871                 )?
3872                 (?:
3873                     [0-9]+\s*w(?:eeks?)?\s*
3874                 )?
3875                 (?:
3876                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3877                 )?
3878                 T)?
3879                 (?:
3880                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881                 )?
3882                 (?:
3883                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884                 )?
3885                 (?:
3886                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3887                 )?Z?$''', s)
3888         if m:
3889             days, hours, mins, secs, ms = m.groups()
3890         else:
3891             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3892             if m:
3893                 hours, mins = m.groups()
3894             else:
3895                 return None
3896
3897     duration = 0
3898     if secs:
3899         duration += float(secs)
3900     if mins:
3901         duration += float(mins) * 60
3902     if hours:
3903         duration += float(hours) * 60 * 60
3904     if days:
3905         duration += float(days) * 24 * 60 * 60
3906     if ms:
3907         duration += float(ms)
3908     return duration
3909
3910
3911 def prepend_extension(filename, ext, expected_real_ext=None):
3912     name, real_ext = os.path.splitext(filename)
3913     return (
3914         '{0}.{1}{2}'.format(name, ext, real_ext)
3915         if not expected_real_ext or real_ext[1:] == expected_real_ext
3916         else '{0}.{1}'.format(filename, ext))
3917
3918
3919 def replace_extension(filename, ext, expected_real_ext=None):
3920     name, real_ext = os.path.splitext(filename)
3921     return '{0}.{1}'.format(
3922         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923         ext)
3924
3925
3926 def check_executable(exe, args=[]):
3927     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928     args can be a list of arguments for a short output (like -version) """
3929     try:
3930         process_communicate_or_kill(subprocess.Popen(
3931             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3932     except OSError:
3933         return False
3934     return exe
3935
3936
3937 def get_exe_version(exe, args=['--version'],
3938                     version_re=None, unrecognized='present'):
3939     """ Returns the version of the specified executable,
3940     or False if the executable is not present """
3941     try:
3942         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3943         # SIGTTOU if yt-dlp is run in the background.
3944         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3945         out, _ = process_communicate_or_kill(subprocess.Popen(
3946             [encodeArgument(exe)] + args,
3947             stdin=subprocess.PIPE,
3948             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3949     except OSError:
3950         return False
3951     if isinstance(out, bytes):  # Python 2.x
3952         out = out.decode('ascii', 'ignore')
3953     return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956 def detect_exe_version(output, version_re=None, unrecognized='present'):
3957     assert isinstance(output, compat_str)
3958     if version_re is None:
3959         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960     m = re.search(version_re, output)
3961     if m:
3962         return m.group(1)
3963     else:
3964         return unrecognized
3965
3966
3967 class LazyList(collections.abc.Sequence):
3968     ''' Lazy immutable list from an iterable
3969     Note that slices of a LazyList are lists and not LazyList'''
3970
3971     def __init__(self, iterable):
3972         self.__iterable = iter(iterable)
3973         self.__cache = []
3974         self.__reversed = False
3975
3976     def __iter__(self):
3977         if self.__reversed:
3978             # We need to consume the entire iterable to iterate in reverse
3979             yield from self.exhaust()
3980             return
3981         yield from self.__cache
3982         for item in self.__iterable:
3983             self.__cache.append(item)
3984             yield item
3985
3986     def __exhaust(self):
3987         self.__cache.extend(self.__iterable)
3988         return self.__cache
3989
3990     def exhaust(self):
3991         ''' Evaluate the entire iterable '''
3992         return self.__exhaust()[::-1 if self.__reversed else 1]
3993
3994     @staticmethod
3995     def __reverse_index(x):
3996         return -(x + 1)
3997
3998     def __getitem__(self, idx):
3999         if isinstance(idx, slice):
4000             step = idx.step or 1
4001             start = idx.start if idx.start is not None else 0 if step > 0 else -1
4002             stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
4003             if self.__reversed:
4004                 (start, stop), step = map(self.__reverse_index, (start, stop)), -step
4005                 idx = slice(start, stop, step)
4006         elif isinstance(idx, int):
4007             if self.__reversed:
4008                 idx = self.__reverse_index(idx)
4009             start = stop = idx
4010         else:
4011             raise TypeError('indices must be integers or slices')
4012         if start < 0 or stop < 0:
4013             # We need to consume the entire iterable to be able to slice from the end
4014             # Obviously, never use this with infinite iterables
4015             return self.__exhaust()[idx]
4016
4017         n = max(start, stop) - len(self.__cache) + 1
4018         if n > 0:
4019             self.__cache.extend(itertools.islice(self.__iterable, n))
4020         return self.__cache[idx]
4021
4022     def __bool__(self):
4023         try:
4024             self[-1] if self.__reversed else self[0]
4025         except IndexError:
4026             return False
4027         return True
4028
4029     def __len__(self):
4030         self.exhaust()
4031         return len(self.__cache)
4032
4033     def reverse(self):
4034         self.__reversed = not self.__reversed
4035         return self
4036
4037     def __repr__(self):
4038         # repr and str should mimic a list. So we exhaust the iterable
4039         return repr(self.exhaust())
4040
4041     def __str__(self):
4042         return repr(self.exhaust())
4043
4044
4045 class PagedList(object):
4046     def __len__(self):
4047         # This is only useful for tests
4048         return len(self.getslice())
4049
4050     def getslice(self, start, end):
4051         raise NotImplementedError('This method must be implemented by subclasses')
4052
4053     def __getitem__(self, idx):
4054         if not isinstance(idx, int) or idx < 0:
4055             raise TypeError('indices must be non-negative integers')
4056         entries = self.getslice(idx, idx + 1)
4057         return entries[0] if entries else None
4058
4059
4060 class OnDemandPagedList(PagedList):
4061     def __init__(self, pagefunc, pagesize, use_cache=True):
4062         self._pagefunc = pagefunc
4063         self._pagesize = pagesize
4064         self._use_cache = use_cache
4065         if use_cache:
4066             self._cache = {}
4067
4068     def getslice(self, start=0, end=None):
4069         res = []
4070         for pagenum in itertools.count(start // self._pagesize):
4071             firstid = pagenum * self._pagesize
4072             nextfirstid = pagenum * self._pagesize + self._pagesize
4073             if start >= nextfirstid:
4074                 continue
4075
4076             page_results = None
4077             if self._use_cache:
4078                 page_results = self._cache.get(pagenum)
4079             if page_results is None:
4080                 page_results = list(self._pagefunc(pagenum))
4081             if self._use_cache:
4082                 self._cache[pagenum] = page_results
4083
4084             startv = (
4085                 start % self._pagesize
4086                 if firstid <= start < nextfirstid
4087                 else 0)
4088
4089             endv = (
4090                 ((end - 1) % self._pagesize) + 1
4091                 if (end is not None and firstid <= end <= nextfirstid)
4092                 else None)
4093
4094             if startv != 0 or endv is not None:
4095                 page_results = page_results[startv:endv]
4096             res.extend(page_results)
4097
4098             # A little optimization - if current page is not "full", ie. does
4099             # not contain page_size videos then we can assume that this page
4100             # is the last one - there are no more ids on further pages -
4101             # i.e. no need to query again.
4102             if len(page_results) + startv < self._pagesize:
4103                 break
4104
4105             # If we got the whole page, but the next page is not interesting,
4106             # break out early as well
4107             if end == nextfirstid:
4108                 break
4109         return res
4110
4111
4112 class InAdvancePagedList(PagedList):
4113     def __init__(self, pagefunc, pagecount, pagesize):
4114         self._pagefunc = pagefunc
4115         self._pagecount = pagecount
4116         self._pagesize = pagesize
4117
4118     def getslice(self, start=0, end=None):
4119         res = []
4120         start_page = start // self._pagesize
4121         end_page = (
4122             self._pagecount if end is None else (end // self._pagesize + 1))
4123         skip_elems = start - start_page * self._pagesize
4124         only_more = None if end is None else end - start
4125         for pagenum in range(start_page, end_page):
4126             page = list(self._pagefunc(pagenum))
4127             if skip_elems:
4128                 page = page[skip_elems:]
4129                 skip_elems = None
4130             if only_more is not None:
4131                 if len(page) < only_more:
4132                     only_more -= len(page)
4133                 else:
4134                     page = page[:only_more]
4135                     res.extend(page)
4136                     break
4137             res.extend(page)
4138         return res
4139
4140
4141 def uppercase_escape(s):
4142     unicode_escape = codecs.getdecoder('unicode_escape')
4143     return re.sub(
4144         r'\\U[0-9a-fA-F]{8}',
4145         lambda m: unicode_escape(m.group(0))[0],
4146         s)
4147
4148
4149 def lowercase_escape(s):
4150     unicode_escape = codecs.getdecoder('unicode_escape')
4151     return re.sub(
4152         r'\\u[0-9a-fA-F]{4}',
4153         lambda m: unicode_escape(m.group(0))[0],
4154         s)
4155
4156
4157 def escape_rfc3986(s):
4158     """Escape non-ASCII characters as suggested by RFC 3986"""
4159     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4160         s = s.encode('utf-8')
4161     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4162
4163
4164 def escape_url(url):
4165     """Escape URL as suggested by RFC 3986"""
4166     url_parsed = compat_urllib_parse_urlparse(url)
4167     return url_parsed._replace(
4168         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4169         path=escape_rfc3986(url_parsed.path),
4170         params=escape_rfc3986(url_parsed.params),
4171         query=escape_rfc3986(url_parsed.query),
4172         fragment=escape_rfc3986(url_parsed.fragment)
4173     ).geturl()
4174
4175
4176 def read_batch_urls(batch_fd):
4177     def fixup(url):
4178         if not isinstance(url, compat_str):
4179             url = url.decode('utf-8', 'replace')
4180         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4181         for bom in BOM_UTF8:
4182             if url.startswith(bom):
4183                 url = url[len(bom):]
4184         url = url.lstrip()
4185         if not url or url.startswith(('#', ';', ']')):
4186             return False
4187         # "#" cannot be stripped out since it is part of the URI
4188         # However, it can be safely stipped out if follwing a whitespace
4189         return re.split(r'\s#', url, 1)[0].rstrip()
4190
4191     with contextlib.closing(batch_fd) as fd:
4192         return [url for url in map(fixup, fd) if url]
4193
4194
4195 def urlencode_postdata(*args, **kargs):
4196     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4197
4198
4199 def update_url_query(url, query):
4200     if not query:
4201         return url
4202     parsed_url = compat_urlparse.urlparse(url)
4203     qs = compat_parse_qs(parsed_url.query)
4204     qs.update(query)
4205     return compat_urlparse.urlunparse(parsed_url._replace(
4206         query=compat_urllib_parse_urlencode(qs, True)))
4207
4208
4209 def update_Request(req, url=None, data=None, headers={}, query={}):
4210     req_headers = req.headers.copy()
4211     req_headers.update(headers)
4212     req_data = data or req.data
4213     req_url = update_url_query(url or req.get_full_url(), query)
4214     req_get_method = req.get_method()
4215     if req_get_method == 'HEAD':
4216         req_type = HEADRequest
4217     elif req_get_method == 'PUT':
4218         req_type = PUTRequest
4219     else:
4220         req_type = compat_urllib_request.Request
4221     new_req = req_type(
4222         req_url, data=req_data, headers=req_headers,
4223         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4224     if hasattr(req, 'timeout'):
4225         new_req.timeout = req.timeout
4226     return new_req
4227
4228
4229 def _multipart_encode_impl(data, boundary):
4230     content_type = 'multipart/form-data; boundary=%s' % boundary
4231
4232     out = b''
4233     for k, v in data.items():
4234         out += b'--' + boundary.encode('ascii') + b'\r\n'
4235         if isinstance(k, compat_str):
4236             k = k.encode('utf-8')
4237         if isinstance(v, compat_str):
4238             v = v.encode('utf-8')
4239         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4240         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4241         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4242         if boundary.encode('ascii') in content:
4243             raise ValueError('Boundary overlaps with data')
4244         out += content
4245
4246     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4247
4248     return out, content_type
4249
4250
4251 def multipart_encode(data, boundary=None):
4252     '''
4253     Encode a dict to RFC 7578-compliant form-data
4254
4255     data:
4256         A dict where keys and values can be either Unicode or bytes-like
4257         objects.
4258     boundary:
4259         If specified a Unicode object, it's used as the boundary. Otherwise
4260         a random boundary is generated.
4261
4262     Reference: https://tools.ietf.org/html/rfc7578
4263     '''
4264     has_specified_boundary = boundary is not None
4265
4266     while True:
4267         if boundary is None:
4268             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4269
4270         try:
4271             out, content_type = _multipart_encode_impl(data, boundary)
4272             break
4273         except ValueError:
4274             if has_specified_boundary:
4275                 raise
4276             boundary = None
4277
4278     return out, content_type
4279
4280
4281 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4282     if isinstance(key_or_keys, (list, tuple)):
4283         for key in key_or_keys:
4284             if key not in d or d[key] is None or skip_false_values and not d[key]:
4285                 continue
4286             return d[key]
4287         return default
4288     return d.get(key_or_keys, default)
4289
4290
4291 def try_get(src, getter, expected_type=None):
4292     for get in variadic(getter):
4293         try:
4294             v = get(src)
4295         except (AttributeError, KeyError, TypeError, IndexError):
4296             pass
4297         else:
4298             if expected_type is None or isinstance(v, expected_type):
4299                 return v
4300
4301
4302 def merge_dicts(*dicts):
4303     merged = {}
4304     for a_dict in dicts:
4305         for k, v in a_dict.items():
4306             if v is None:
4307                 continue
4308             if (k not in merged
4309                     or (isinstance(v, compat_str) and v
4310                         and isinstance(merged[k], compat_str)
4311                         and not merged[k])):
4312                 merged[k] = v
4313     return merged
4314
4315
4316 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4317     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4318
4319
4320 US_RATINGS = {
4321     'G': 0,
4322     'PG': 10,
4323     'PG-13': 13,
4324     'R': 16,
4325     'NC': 18,
4326 }
4327
4328
4329 TV_PARENTAL_GUIDELINES = {
4330     'TV-Y': 0,
4331     'TV-Y7': 7,
4332     'TV-G': 0,
4333     'TV-PG': 0,
4334     'TV-14': 14,
4335     'TV-MA': 17,
4336 }
4337
4338
4339 def parse_age_limit(s):
4340     if type(s) == int:
4341         return s if 0 <= s <= 21 else None
4342     if not isinstance(s, compat_basestring):
4343         return None
4344     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4345     if m:
4346         return int(m.group('age'))
4347     s = s.upper()
4348     if s in US_RATINGS:
4349         return US_RATINGS[s]
4350     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4351     if m:
4352         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4353     return None
4354
4355
4356 def strip_jsonp(code):
4357     return re.sub(
4358         r'''(?sx)^
4359             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4360             (?:\s*&&\s*(?P=func_name))?
4361             \s*\(\s*(?P<callback_data>.*)\);?
4362             \s*?(?://[^\n]*)*$''',
4363         r'\g<callback_data>', code)
4364
4365
4366 def js_to_json(code, vars={}):
4367     # vars is a dict of var, val pairs to substitute
4368     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4369     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4370     INTEGER_TABLE = (
4371         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4372         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4373     )
4374
4375     def fix_kv(m):
4376         v = m.group(0)
4377         if v in ('true', 'false', 'null'):
4378             return v
4379         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4380             return ""
4381
4382         if v[0] in ("'", '"'):
4383             v = re.sub(r'(?s)\\.|"', lambda m: {
4384                 '"': '\\"',
4385                 "\\'": "'",
4386                 '\\\n': '',
4387                 '\\x': '\\u00',
4388             }.get(m.group(0), m.group(0)), v[1:-1])
4389         else:
4390             for regex, base in INTEGER_TABLE:
4391                 im = re.match(regex, v)
4392                 if im:
4393                     i = int(im.group(1), base)
4394                     return '"%d":' % i if v.endswith(':') else '%d' % i
4395
4396             if v in vars:
4397                 return vars[v]
4398
4399         return '"%s"' % v
4400
4401     return re.sub(r'''(?sx)
4402         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4403         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4404         {comment}|,(?={skip}[\]}}])|
4405         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4406         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4407         [0-9]+(?={skip}:)|
4408         !+
4409         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4410
4411
4412 def qualities(quality_ids):
4413     """ Get a numeric quality value out of a list of possible values """
4414     def q(qid):
4415         try:
4416             return quality_ids.index(qid)
4417         except ValueError:
4418             return -1
4419     return q
4420
4421
4422 DEFAULT_OUTTMPL = {
4423     'default': '%(title)s [%(id)s].%(ext)s',
4424     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4425 }
4426 OUTTMPL_TYPES = {
4427     'chapter': None,
4428     'subtitle': None,
4429     'thumbnail': None,
4430     'description': 'description',
4431     'annotation': 'annotations.xml',
4432     'infojson': 'info.json',
4433     'pl_thumbnail': None,
4434     'pl_description': 'description',
4435     'pl_infojson': 'info.json',
4436 }
4437
4438 # As of [1] format syntax is:
4439 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4440 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4441 STR_FORMAT_RE_TMPL = r'''(?x)
4442     (?<!%)(?P<prefix>(?:%%)*)
4443     %
4444     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4445     (?P<format>
4446         (?:[#0\-+ ]+)?  # conversion flags (optional)
4447         (?:\d+)?  # minimum field width (optional)
4448         (?:\.\d+)?  # precision (optional)
4449         [hlL]?  # length modifier (optional)
4450         {1}  # conversion type
4451     )
4452 '''
4453
4454
4455 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4456
4457
4458 def limit_length(s, length):
4459     """ Add ellipses to overly long strings """
4460     if s is None:
4461         return None
4462     ELLIPSES = '...'
4463     if len(s) > length:
4464         return s[:length - len(ELLIPSES)] + ELLIPSES
4465     return s
4466
4467
4468 def version_tuple(v):
4469     return tuple(int(e) for e in re.split(r'[-.]', v))
4470
4471
4472 def is_outdated_version(version, limit, assume_new=True):
4473     if not version:
4474         return not assume_new
4475     try:
4476         return version_tuple(version) < version_tuple(limit)
4477     except ValueError:
4478         return not assume_new
4479
4480
4481 def ytdl_is_updateable():
4482     """ Returns if yt-dlp can be updated with -U """
4483     return False
4484
4485     from zipimport import zipimporter
4486
4487     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4488
4489
4490 def args_to_str(args):
4491     # Get a short string representation for a subprocess command
4492     return ' '.join(compat_shlex_quote(a) for a in args)
4493
4494
4495 def error_to_compat_str(err):
4496     err_str = str(err)
4497     # On python 2 error byte string must be decoded with proper
4498     # encoding rather than ascii
4499     if sys.version_info[0] < 3:
4500         err_str = err_str.decode(preferredencoding())
4501     return err_str
4502
4503
4504 def mimetype2ext(mt):
4505     if mt is None:
4506         return None
4507
4508     ext = {
4509         'audio/mp4': 'm4a',
4510         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4511         # it's the most popular one
4512         'audio/mpeg': 'mp3',
4513         'audio/x-wav': 'wav',
4514     }.get(mt)
4515     if ext is not None:
4516         return ext
4517
4518     _, _, res = mt.rpartition('/')
4519     res = res.split(';')[0].strip().lower()
4520
4521     return {
4522         '3gpp': '3gp',
4523         'smptett+xml': 'tt',
4524         'ttaf+xml': 'dfxp',
4525         'ttml+xml': 'ttml',
4526         'x-flv': 'flv',
4527         'x-mp4-fragmented': 'mp4',
4528         'x-ms-sami': 'sami',
4529         'x-ms-wmv': 'wmv',
4530         'mpegurl': 'm3u8',
4531         'x-mpegurl': 'm3u8',
4532         'vnd.apple.mpegurl': 'm3u8',
4533         'dash+xml': 'mpd',
4534         'f4m+xml': 'f4m',
4535         'hds+xml': 'f4m',
4536         'vnd.ms-sstr+xml': 'ism',
4537         'quicktime': 'mov',
4538         'mp2t': 'ts',
4539         'x-wav': 'wav',
4540     }.get(res, res)
4541
4542
4543 def parse_codecs(codecs_str):
4544     # http://tools.ietf.org/html/rfc6381
4545     if not codecs_str:
4546         return {}
4547     split_codecs = list(filter(None, map(
4548         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4549     vcodec, acodec = None, None
4550     for full_codec in split_codecs:
4551         codec = full_codec.split('.')[0]
4552         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4553             if not vcodec:
4554                 vcodec = full_codec
4555         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4556             if not acodec:
4557                 acodec = full_codec
4558         else:
4559             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4560     if not vcodec and not acodec:
4561         if len(split_codecs) == 2:
4562             return {
4563                 'vcodec': split_codecs[0],
4564                 'acodec': split_codecs[1],
4565             }
4566     else:
4567         return {
4568             'vcodec': vcodec or 'none',
4569             'acodec': acodec or 'none',
4570         }
4571     return {}
4572
4573
4574 def urlhandle_detect_ext(url_handle):
4575     getheader = url_handle.headers.get
4576
4577     cd = getheader('Content-Disposition')
4578     if cd:
4579         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4580         if m:
4581             e = determine_ext(m.group('filename'), default_ext=None)
4582             if e:
4583                 return e
4584
4585     return mimetype2ext(getheader('Content-Type'))
4586
4587
4588 def encode_data_uri(data, mime_type):
4589     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4590
4591
4592 def age_restricted(content_limit, age_limit):
4593     """ Returns True iff the content should be blocked """
4594
4595     if age_limit is None:  # No limit set
4596         return False
4597     if content_limit is None:
4598         return False  # Content available for everyone
4599     return age_limit < content_limit
4600
4601
4602 def is_html(first_bytes):
4603     """ Detect whether a file contains HTML by examining its first bytes. """
4604
4605     BOMS = [
4606         (b'\xef\xbb\xbf', 'utf-8'),
4607         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4608         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4609         (b'\xff\xfe', 'utf-16-le'),
4610         (b'\xfe\xff', 'utf-16-be'),
4611     ]
4612     for bom, enc in BOMS:
4613         if first_bytes.startswith(bom):
4614             s = first_bytes[len(bom):].decode(enc, 'replace')
4615             break
4616     else:
4617         s = first_bytes.decode('utf-8', 'replace')
4618
4619     return re.match(r'^\s*<', s)
4620
4621
4622 def determine_protocol(info_dict):
4623     protocol = info_dict.get('protocol')
4624     if protocol is not None:
4625         return protocol
4626
4627     url = info_dict['url']
4628     if url.startswith('rtmp'):
4629         return 'rtmp'
4630     elif url.startswith('mms'):
4631         return 'mms'
4632     elif url.startswith('rtsp'):
4633         return 'rtsp'
4634
4635     ext = determine_ext(url)
4636     if ext == 'm3u8':
4637         return 'm3u8'
4638     elif ext == 'f4m':
4639         return 'f4m'
4640
4641     return compat_urllib_parse_urlparse(url).scheme
4642
4643
4644 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4645     """ Render a list of rows, each as a list of values """
4646
4647     def get_max_lens(table):
4648         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4649
4650     def filter_using_list(row, filterArray):
4651         return [col for (take, col) in zip(filterArray, row) if take]
4652
4653     if hideEmpty:
4654         max_lens = get_max_lens(data)
4655         header_row = filter_using_list(header_row, max_lens)
4656         data = [filter_using_list(row, max_lens) for row in data]
4657
4658     table = [header_row] + data
4659     max_lens = get_max_lens(table)
4660     if delim:
4661         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4662     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4663     return '\n'.join(format_str % tuple(row) for row in table)
4664
4665
4666 def _match_one(filter_part, dct):
4667     COMPARISON_OPERATORS = {
4668         '<': operator.lt,
4669         '<=': operator.le,
4670         '>': operator.gt,
4671         '>=': operator.ge,
4672         '=': operator.eq,
4673         '!=': operator.ne,
4674     }
4675     operator_rex = re.compile(r'''(?x)\s*
4676         (?P<key>[a-z_]+)
4677         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4678         (?:
4679             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4680             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4681             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4682         )
4683         \s*$
4684         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4685     m = operator_rex.search(filter_part)
4686     if m:
4687         op = COMPARISON_OPERATORS[m.group('op')]
4688         actual_value = dct.get(m.group('key'))
4689         if (m.group('quotedstrval') is not None
4690             or m.group('strval') is not None
4691             # If the original field is a string and matching comparisonvalue is
4692             # a number we should respect the origin of the original field
4693             # and process comparison value as a string (see
4694             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4695             or actual_value is not None and m.group('intval') is not None
4696                 and isinstance(actual_value, compat_str)):
4697             if m.group('op') not in ('=', '!='):
4698                 raise ValueError(
4699                     'Operator %s does not support string values!' % m.group('op'))
4700             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4701             quote = m.group('quote')
4702             if quote is not None:
4703                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4704         else:
4705             try:
4706                 comparison_value = int(m.group('intval'))
4707             except ValueError:
4708                 comparison_value = parse_filesize(m.group('intval'))
4709                 if comparison_value is None:
4710                     comparison_value = parse_filesize(m.group('intval') + 'B')
4711                 if comparison_value is None:
4712                     raise ValueError(
4713                         'Invalid integer value %r in filter part %r' % (
4714                             m.group('intval'), filter_part))
4715         if actual_value is None:
4716             return m.group('none_inclusive')
4717         return op(actual_value, comparison_value)
4718
4719     UNARY_OPERATORS = {
4720         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4721         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4722     }
4723     operator_rex = re.compile(r'''(?x)\s*
4724         (?P<op>%s)\s*(?P<key>[a-z_]+)
4725         \s*$
4726         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4727     m = operator_rex.search(filter_part)
4728     if m:
4729         op = UNARY_OPERATORS[m.group('op')]
4730         actual_value = dct.get(m.group('key'))
4731         return op(actual_value)
4732
4733     raise ValueError('Invalid filter part %r' % filter_part)
4734
4735
4736 def match_str(filter_str, dct):
4737     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4738
4739     return all(
4740         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4741
4742
4743 def match_filter_func(filter_str):
4744     def _match_func(info_dict):
4745         if match_str(filter_str, info_dict):
4746             return None
4747         else:
4748             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4749             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4750     return _match_func
4751
4752
4753 def parse_dfxp_time_expr(time_expr):
4754     if not time_expr:
4755         return
4756
4757     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4758     if mobj:
4759         return float(mobj.group('time_offset'))
4760
4761     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4762     if mobj:
4763         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4764
4765
4766 def srt_subtitles_timecode(seconds):
4767     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4768
4769
4770 def dfxp2srt(dfxp_data):
4771     '''
4772     @param dfxp_data A bytes-like object containing DFXP data
4773     @returns A unicode object containing converted SRT data
4774     '''
4775     LEGACY_NAMESPACES = (
4776         (b'http://www.w3.org/ns/ttml', [
4777             b'http://www.w3.org/2004/11/ttaf1',
4778             b'http://www.w3.org/2006/04/ttaf1',
4779             b'http://www.w3.org/2006/10/ttaf1',
4780         ]),
4781         (b'http://www.w3.org/ns/ttml#styling', [
4782             b'http://www.w3.org/ns/ttml#style',
4783         ]),
4784     )
4785
4786     SUPPORTED_STYLING = [
4787         'color',
4788         'fontFamily',
4789         'fontSize',
4790         'fontStyle',
4791         'fontWeight',
4792         'textDecoration'
4793     ]
4794
4795     _x = functools.partial(xpath_with_ns, ns_map={
4796         'xml': 'http://www.w3.org/XML/1998/namespace',
4797         'ttml': 'http://www.w3.org/ns/ttml',
4798         'tts': 'http://www.w3.org/ns/ttml#styling',
4799     })
4800
4801     styles = {}
4802     default_style = {}
4803
4804     class TTMLPElementParser(object):
4805         _out = ''
4806         _unclosed_elements = []
4807         _applied_styles = []
4808
4809         def start(self, tag, attrib):
4810             if tag in (_x('ttml:br'), 'br'):
4811                 self._out += '\n'
4812             else:
4813                 unclosed_elements = []
4814                 style = {}
4815                 element_style_id = attrib.get('style')
4816                 if default_style:
4817                     style.update(default_style)
4818                 if element_style_id:
4819                     style.update(styles.get(element_style_id, {}))
4820                 for prop in SUPPORTED_STYLING:
4821                     prop_val = attrib.get(_x('tts:' + prop))
4822                     if prop_val:
4823                         style[prop] = prop_val
4824                 if style:
4825                     font = ''
4826                     for k, v in sorted(style.items()):
4827                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4828                             continue
4829                         if k == 'color':
4830                             font += ' color="%s"' % v
4831                         elif k == 'fontSize':
4832                             font += ' size="%s"' % v
4833                         elif k == 'fontFamily':
4834                             font += ' face="%s"' % v
4835                         elif k == 'fontWeight' and v == 'bold':
4836                             self._out += '<b>'
4837                             unclosed_elements.append('b')
4838                         elif k == 'fontStyle' and v == 'italic':
4839                             self._out += '<i>'
4840                             unclosed_elements.append('i')
4841                         elif k == 'textDecoration' and v == 'underline':
4842                             self._out += '<u>'
4843                             unclosed_elements.append('u')
4844                     if font:
4845                         self._out += '<font' + font + '>'
4846                         unclosed_elements.append('font')
4847                     applied_style = {}
4848                     if self._applied_styles:
4849                         applied_style.update(self._applied_styles[-1])
4850                     applied_style.update(style)
4851                     self._applied_styles.append(applied_style)
4852                 self._unclosed_elements.append(unclosed_elements)
4853
4854         def end(self, tag):
4855             if tag not in (_x('ttml:br'), 'br'):
4856                 unclosed_elements = self._unclosed_elements.pop()
4857                 for element in reversed(unclosed_elements):
4858                     self._out += '</%s>' % element
4859                 if unclosed_elements and self._applied_styles:
4860                     self._applied_styles.pop()
4861
4862         def data(self, data):
4863             self._out += data
4864
4865         def close(self):
4866             return self._out.strip()
4867
4868     def parse_node(node):
4869         target = TTMLPElementParser()
4870         parser = xml.etree.ElementTree.XMLParser(target=target)
4871         parser.feed(xml.etree.ElementTree.tostring(node))
4872         return parser.close()
4873
4874     for k, v in LEGACY_NAMESPACES:
4875         for ns in v:
4876             dfxp_data = dfxp_data.replace(ns, k)
4877
4878     dfxp = compat_etree_fromstring(dfxp_data)
4879     out = []
4880     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4881
4882     if not paras:
4883         raise ValueError('Invalid dfxp/TTML subtitle')
4884
4885     repeat = False
4886     while True:
4887         for style in dfxp.findall(_x('.//ttml:style')):
4888             style_id = style.get('id') or style.get(_x('xml:id'))
4889             if not style_id:
4890                 continue
4891             parent_style_id = style.get('style')
4892             if parent_style_id:
4893                 if parent_style_id not in styles:
4894                     repeat = True
4895                     continue
4896                 styles[style_id] = styles[parent_style_id].copy()
4897             for prop in SUPPORTED_STYLING:
4898                 prop_val = style.get(_x('tts:' + prop))
4899                 if prop_val:
4900                     styles.setdefault(style_id, {})[prop] = prop_val
4901         if repeat:
4902             repeat = False
4903         else:
4904             break
4905
4906     for p in ('body', 'div'):
4907         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4908         if ele is None:
4909             continue
4910         style = styles.get(ele.get('style'))
4911         if not style:
4912             continue
4913         default_style.update(style)
4914
4915     for para, index in zip(paras, itertools.count(1)):
4916         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4917         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4918         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4919         if begin_time is None:
4920             continue
4921         if not end_time:
4922             if not dur:
4923                 continue
4924             end_time = begin_time + dur
4925         out.append('%d\n%s --> %s\n%s\n\n' % (
4926             index,
4927             srt_subtitles_timecode(begin_time),
4928             srt_subtitles_timecode(end_time),
4929             parse_node(para)))
4930
4931     return ''.join(out)
4932
4933
4934 def cli_option(params, command_option, param):
4935     param = params.get(param)
4936     if param:
4937         param = compat_str(param)
4938     return [command_option, param] if param is not None else []
4939
4940
4941 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4942     param = params.get(param)
4943     if param is None:
4944         return []
4945     assert isinstance(param, bool)
4946     if separator:
4947         return [command_option + separator + (true_value if param else false_value)]
4948     return [command_option, true_value if param else false_value]
4949
4950
4951 def cli_valueless_option(params, command_option, param, expected_value=True):
4952     param = params.get(param)
4953     return [command_option] if param == expected_value else []
4954
4955
4956 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4957     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4958         if use_compat:
4959             return argdict
4960         else:
4961             argdict = None
4962     if argdict is None:
4963         return default
4964     assert isinstance(argdict, dict)
4965
4966     assert isinstance(keys, (list, tuple))
4967     for key_list in keys:
4968         arg_list = list(filter(
4969             lambda x: x is not None,
4970             [argdict.get(key.lower()) for key in variadic(key_list)]))
4971         if arg_list:
4972             return [arg for args in arg_list for arg in args]
4973     return default
4974
4975
4976 class ISO639Utils(object):
4977     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4978     _lang_map = {
4979         'aa': 'aar',
4980         'ab': 'abk',
4981         'ae': 'ave',
4982         'af': 'afr',
4983         'ak': 'aka',
4984         'am': 'amh',
4985         'an': 'arg',
4986         'ar': 'ara',
4987         'as': 'asm',
4988         'av': 'ava',
4989         'ay': 'aym',
4990         'az': 'aze',
4991         'ba': 'bak',
4992         'be': 'bel',
4993         'bg': 'bul',
4994         'bh': 'bih',
4995         'bi': 'bis',
4996         'bm': 'bam',
4997         'bn': 'ben',
4998         'bo': 'bod',
4999         'br': 'bre',
5000         'bs': 'bos',
5001         'ca': 'cat',
5002         'ce': 'che',
5003         'ch': 'cha',
5004         'co': 'cos',
5005         'cr': 'cre',
5006         'cs': 'ces',
5007         'cu': 'chu',
5008         'cv': 'chv',
5009         'cy': 'cym',
5010         'da': 'dan',
5011         'de': 'deu',
5012         'dv': 'div',
5013         'dz': 'dzo',
5014         'ee': 'ewe',
5015         'el': 'ell',
5016         'en': 'eng',
5017         'eo': 'epo',
5018         'es': 'spa',
5019         'et': 'est',
5020         'eu': 'eus',
5021         'fa': 'fas',
5022         'ff': 'ful',
5023         'fi': 'fin',
5024         'fj': 'fij',
5025         'fo': 'fao',
5026         'fr': 'fra',
5027         'fy': 'fry',
5028         'ga': 'gle',
5029         'gd': 'gla',
5030         'gl': 'glg',
5031         'gn': 'grn',
5032         'gu': 'guj',
5033         'gv': 'glv',
5034         'ha': 'hau',
5035         'he': 'heb',
5036         'iw': 'heb',  # Replaced by he in 1989 revision
5037         'hi': 'hin',
5038         'ho': 'hmo',
5039         'hr': 'hrv',
5040         'ht': 'hat',
5041         'hu': 'hun',
5042         'hy': 'hye',
5043         'hz': 'her',
5044         'ia': 'ina',
5045         'id': 'ind',
5046         'in': 'ind',  # Replaced by id in 1989 revision
5047         'ie': 'ile',
5048         'ig': 'ibo',
5049         'ii': 'iii',
5050         'ik': 'ipk',
5051         'io': 'ido',
5052         'is': 'isl',
5053         'it': 'ita',
5054         'iu': 'iku',
5055         'ja': 'jpn',
5056         'jv': 'jav',
5057         'ka': 'kat',
5058         'kg': 'kon',
5059         'ki': 'kik',
5060         'kj': 'kua',
5061         'kk': 'kaz',
5062         'kl': 'kal',
5063         'km': 'khm',
5064         'kn': 'kan',
5065         'ko': 'kor',
5066         'kr': 'kau',
5067         'ks': 'kas',
5068         'ku': 'kur',
5069         'kv': 'kom',
5070         'kw': 'cor',
5071         'ky': 'kir',
5072         'la': 'lat',
5073         'lb': 'ltz',
5074         'lg': 'lug',
5075         'li': 'lim',
5076         'ln': 'lin',
5077         'lo': 'lao',
5078         'lt': 'lit',
5079         'lu': 'lub',
5080         'lv': 'lav',
5081         'mg': 'mlg',
5082         'mh': 'mah',
5083         'mi': 'mri',
5084         'mk': 'mkd',
5085         'ml': 'mal',
5086         'mn': 'mon',
5087         'mr': 'mar',
5088         'ms': 'msa',
5089         'mt': 'mlt',
5090         'my': 'mya',
5091         'na': 'nau',
5092         'nb': 'nob',
5093         'nd': 'nde',
5094         'ne': 'nep',
5095         'ng': 'ndo',
5096         'nl': 'nld',
5097         'nn': 'nno',
5098         'no': 'nor',
5099         'nr': 'nbl',
5100         'nv': 'nav',
5101         'ny': 'nya',
5102         'oc': 'oci',
5103         'oj': 'oji',
5104         'om': 'orm',
5105         'or': 'ori',
5106         'os': 'oss',
5107         'pa': 'pan',
5108         'pi': 'pli',
5109         'pl': 'pol',
5110         'ps': 'pus',
5111         'pt': 'por',
5112         'qu': 'que',
5113         'rm': 'roh',
5114         'rn': 'run',
5115         'ro': 'ron',
5116         'ru': 'rus',
5117         'rw': 'kin',
5118         'sa': 'san',
5119         'sc': 'srd',
5120         'sd': 'snd',
5121         'se': 'sme',
5122         'sg': 'sag',
5123         'si': 'sin',
5124         'sk': 'slk',
5125         'sl': 'slv',
5126         'sm': 'smo',
5127         'sn': 'sna',
5128         'so': 'som',
5129         'sq': 'sqi',
5130         'sr': 'srp',
5131         'ss': 'ssw',
5132         'st': 'sot',
5133         'su': 'sun',
5134         'sv': 'swe',
5135         'sw': 'swa',
5136         'ta': 'tam',
5137         'te': 'tel',
5138         'tg': 'tgk',
5139         'th': 'tha',
5140         'ti': 'tir',
5141         'tk': 'tuk',
5142         'tl': 'tgl',
5143         'tn': 'tsn',
5144         'to': 'ton',
5145         'tr': 'tur',
5146         'ts': 'tso',
5147         'tt': 'tat',
5148         'tw': 'twi',
5149         'ty': 'tah',
5150         'ug': 'uig',
5151         'uk': 'ukr',
5152         'ur': 'urd',
5153         'uz': 'uzb',
5154         've': 'ven',
5155         'vi': 'vie',
5156         'vo': 'vol',
5157         'wa': 'wln',
5158         'wo': 'wol',
5159         'xh': 'xho',
5160         'yi': 'yid',
5161         'ji': 'yid',  # Replaced by yi in 1989 revision
5162         'yo': 'yor',
5163         'za': 'zha',
5164         'zh': 'zho',
5165         'zu': 'zul',
5166     }
5167
5168     @classmethod
5169     def short2long(cls, code):
5170         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5171         return cls._lang_map.get(code[:2])
5172
5173     @classmethod
5174     def long2short(cls, code):
5175         """Convert language code from ISO 639-2/T to ISO 639-1"""
5176         for short_name, long_name in cls._lang_map.items():
5177             if long_name == code:
5178                 return short_name
5179
5180
5181 class ISO3166Utils(object):
5182     # From http://data.okfn.org/data/core/country-list
5183     _country_map = {
5184         'AF': 'Afghanistan',
5185         'AX': 'Åland Islands',
5186         'AL': 'Albania',
5187         'DZ': 'Algeria',
5188         'AS': 'American Samoa',
5189         'AD': 'Andorra',
5190         'AO': 'Angola',
5191         'AI': 'Anguilla',
5192         'AQ': 'Antarctica',
5193         'AG': 'Antigua and Barbuda',
5194         'AR': 'Argentina',
5195         'AM': 'Armenia',
5196         'AW': 'Aruba',
5197         'AU': 'Australia',
5198         'AT': 'Austria',
5199         'AZ': 'Azerbaijan',
5200         'BS': 'Bahamas',
5201         'BH': 'Bahrain',
5202         'BD': 'Bangladesh',
5203         'BB': 'Barbados',
5204         'BY': 'Belarus',
5205         'BE': 'Belgium',
5206         'BZ': 'Belize',
5207         'BJ': 'Benin',
5208         'BM': 'Bermuda',
5209         'BT': 'Bhutan',
5210         'BO': 'Bolivia, Plurinational State of',
5211         'BQ': 'Bonaire, Sint Eustatius and Saba',
5212         'BA': 'Bosnia and Herzegovina',
5213         'BW': 'Botswana',
5214         'BV': 'Bouvet Island',
5215         'BR': 'Brazil',
5216         'IO': 'British Indian Ocean Territory',
5217         'BN': 'Brunei Darussalam',
5218         'BG': 'Bulgaria',
5219         'BF': 'Burkina Faso',
5220         'BI': 'Burundi',
5221         'KH': 'Cambodia',
5222         'CM': 'Cameroon',
5223         'CA': 'Canada',
5224         'CV': 'Cape Verde',
5225         'KY': 'Cayman Islands',
5226         'CF': 'Central African Republic',
5227         'TD': 'Chad',
5228         'CL': 'Chile',
5229         'CN': 'China',
5230         'CX': 'Christmas Island',
5231         'CC': 'Cocos (Keeling) Islands',
5232         'CO': 'Colombia',
5233         'KM': 'Comoros',
5234         'CG': 'Congo',
5235         'CD': 'Congo, the Democratic Republic of the',
5236         'CK': 'Cook Islands',
5237         'CR': 'Costa Rica',
5238         'CI': 'Côte d\'Ivoire',
5239         'HR': 'Croatia',
5240         'CU': 'Cuba',
5241         'CW': 'Curaçao',
5242         'CY': 'Cyprus',
5243         'CZ': 'Czech Republic',
5244         'DK': 'Denmark',
5245         'DJ': 'Djibouti',
5246         'DM': 'Dominica',
5247         'DO': 'Dominican Republic',
5248         'EC': 'Ecuador',
5249         'EG': 'Egypt',
5250         'SV': 'El Salvador',
5251         'GQ': 'Equatorial Guinea',
5252         'ER': 'Eritrea',
5253         'EE': 'Estonia',
5254         'ET': 'Ethiopia',
5255         'FK': 'Falkland Islands (Malvinas)',
5256         'FO': 'Faroe Islands',
5257         'FJ': 'Fiji',
5258         'FI': 'Finland',
5259         'FR': 'France',
5260         'GF': 'French Guiana',
5261         'PF': 'French Polynesia',
5262         'TF': 'French Southern Territories',
5263         'GA': 'Gabon',
5264         'GM': 'Gambia',
5265         'GE': 'Georgia',
5266         'DE': 'Germany',
5267         'GH': 'Ghana',
5268         'GI': 'Gibraltar',
5269         'GR': 'Greece',
5270         'GL': 'Greenland',
5271         'GD': 'Grenada',
5272         'GP': 'Guadeloupe',
5273         'GU': 'Guam',
5274         'GT': 'Guatemala',
5275         'GG': 'Guernsey',
5276         'GN': 'Guinea',
5277         'GW': 'Guinea-Bissau',
5278         'GY': 'Guyana',
5279         'HT': 'Haiti',
5280         'HM': 'Heard Island and McDonald Islands',
5281         'VA': 'Holy See (Vatican City State)',
5282         'HN': 'Honduras',
5283         'HK': 'Hong Kong',
5284         'HU': 'Hungary',
5285         'IS': 'Iceland',
5286         'IN': 'India',
5287         'ID': 'Indonesia',
5288         'IR': 'Iran, Islamic Republic of',
5289         'IQ': 'Iraq',
5290         'IE': 'Ireland',
5291         'IM': 'Isle of Man',
5292         'IL': 'Israel',
5293         'IT': 'Italy',
5294         'JM': 'Jamaica',
5295         'JP': 'Japan',
5296         'JE': 'Jersey',
5297         'JO': 'Jordan',
5298         'KZ': 'Kazakhstan',
5299         'KE': 'Kenya',
5300         'KI': 'Kiribati',
5301         'KP': 'Korea, Democratic People\'s Republic of',
5302         'KR': 'Korea, Republic of',
5303         'KW': 'Kuwait',
5304         'KG': 'Kyrgyzstan',
5305         'LA': 'Lao People\'s Democratic Republic',
5306         'LV': 'Latvia',
5307         'LB': 'Lebanon',
5308         'LS': 'Lesotho',
5309         'LR': 'Liberia',
5310         'LY': 'Libya',
5311         'LI': 'Liechtenstein',
5312         'LT': 'Lithuania',
5313         'LU': 'Luxembourg',
5314         'MO': 'Macao',
5315         'MK': 'Macedonia, the Former Yugoslav Republic of',
5316         'MG': 'Madagascar',
5317         'MW': 'Malawi',
5318         'MY': 'Malaysia',
5319         'MV': 'Maldives',
5320         'ML': 'Mali',
5321         'MT': 'Malta',
5322         'MH': 'Marshall Islands',
5323         'MQ': 'Martinique',
5324         'MR': 'Mauritania',
5325         'MU': 'Mauritius',
5326         'YT': 'Mayotte',
5327         'MX': 'Mexico',
5328         'FM': 'Micronesia, Federated States of',
5329         'MD': 'Moldova, Republic of',
5330         'MC': 'Monaco',
5331         'MN': 'Mongolia',
5332         'ME': 'Montenegro',
5333         'MS': 'Montserrat',
5334         'MA': 'Morocco',
5335         'MZ': 'Mozambique',
5336         'MM': 'Myanmar',
5337         'NA': 'Namibia',
5338         'NR': 'Nauru',
5339         'NP': 'Nepal',
5340         'NL': 'Netherlands',
5341         'NC': 'New Caledonia',
5342         'NZ': 'New Zealand',
5343         'NI': 'Nicaragua',
5344         'NE': 'Niger',
5345         'NG': 'Nigeria',
5346         'NU': 'Niue',
5347         'NF': 'Norfolk Island',
5348         'MP': 'Northern Mariana Islands',
5349         'NO': 'Norway',
5350         'OM': 'Oman',
5351         'PK': 'Pakistan',
5352         'PW': 'Palau',
5353         'PS': 'Palestine, State of',
5354         'PA': 'Panama',
5355         'PG': 'Papua New Guinea',
5356         'PY': 'Paraguay',
5357         'PE': 'Peru',
5358         'PH': 'Philippines',
5359         'PN': 'Pitcairn',
5360         'PL': 'Poland',
5361         'PT': 'Portugal',
5362         'PR': 'Puerto Rico',
5363         'QA': 'Qatar',
5364         'RE': 'Réunion',
5365         'RO': 'Romania',
5366         'RU': 'Russian Federation',
5367         'RW': 'Rwanda',
5368         'BL': 'Saint Barthélemy',
5369         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5370         'KN': 'Saint Kitts and Nevis',
5371         'LC': 'Saint Lucia',
5372         'MF': 'Saint Martin (French part)',
5373         'PM': 'Saint Pierre and Miquelon',
5374         'VC': 'Saint Vincent and the Grenadines',
5375         'WS': 'Samoa',
5376         'SM': 'San Marino',
5377         'ST': 'Sao Tome and Principe',
5378         'SA': 'Saudi Arabia',
5379         'SN': 'Senegal',
5380         'RS': 'Serbia',
5381         'SC': 'Seychelles',
5382         'SL': 'Sierra Leone',
5383         'SG': 'Singapore',
5384         'SX': 'Sint Maarten (Dutch part)',
5385         'SK': 'Slovakia',
5386         'SI': 'Slovenia',
5387         'SB': 'Solomon Islands',
5388         'SO': 'Somalia',
5389         'ZA': 'South Africa',
5390         'GS': 'South Georgia and the South Sandwich Islands',
5391         'SS': 'South Sudan',
5392         'ES': 'Spain',
5393         'LK': 'Sri Lanka',
5394         'SD': 'Sudan',
5395         'SR': 'Suriname',
5396         'SJ': 'Svalbard and Jan Mayen',
5397         'SZ': 'Swaziland',
5398         'SE': 'Sweden',
5399         'CH': 'Switzerland',
5400         'SY': 'Syrian Arab Republic',
5401         'TW': 'Taiwan, Province of China',
5402         'TJ': 'Tajikistan',
5403         'TZ': 'Tanzania, United Republic of',
5404         'TH': 'Thailand',
5405         'TL': 'Timor-Leste',
5406         'TG': 'Togo',
5407         'TK': 'Tokelau',
5408         'TO': 'Tonga',
5409         'TT': 'Trinidad and Tobago',
5410         'TN': 'Tunisia',
5411         'TR': 'Turkey',
5412         'TM': 'Turkmenistan',
5413         'TC': 'Turks and Caicos Islands',
5414         'TV': 'Tuvalu',
5415         'UG': 'Uganda',
5416         'UA': 'Ukraine',
5417         'AE': 'United Arab Emirates',
5418         'GB': 'United Kingdom',
5419         'US': 'United States',
5420         'UM': 'United States Minor Outlying Islands',
5421         'UY': 'Uruguay',
5422         'UZ': 'Uzbekistan',
5423         'VU': 'Vanuatu',
5424         'VE': 'Venezuela, Bolivarian Republic of',
5425         'VN': 'Viet Nam',
5426         'VG': 'Virgin Islands, British',
5427         'VI': 'Virgin Islands, U.S.',
5428         'WF': 'Wallis and Futuna',
5429         'EH': 'Western Sahara',
5430         'YE': 'Yemen',
5431         'ZM': 'Zambia',
5432         'ZW': 'Zimbabwe',
5433     }
5434
5435     @classmethod
5436     def short2full(cls, code):
5437         """Convert an ISO 3166-2 country code to the corresponding full name"""
5438         return cls._country_map.get(code.upper())
5439
5440
5441 class GeoUtils(object):
5442     # Major IPv4 address blocks per country
5443     _country_ip_map = {
5444         'AD': '46.172.224.0/19',
5445         'AE': '94.200.0.0/13',
5446         'AF': '149.54.0.0/17',
5447         'AG': '209.59.64.0/18',
5448         'AI': '204.14.248.0/21',
5449         'AL': '46.99.0.0/16',
5450         'AM': '46.70.0.0/15',
5451         'AO': '105.168.0.0/13',
5452         'AP': '182.50.184.0/21',
5453         'AQ': '23.154.160.0/24',
5454         'AR': '181.0.0.0/12',
5455         'AS': '202.70.112.0/20',
5456         'AT': '77.116.0.0/14',
5457         'AU': '1.128.0.0/11',
5458         'AW': '181.41.0.0/18',
5459         'AX': '185.217.4.0/22',
5460         'AZ': '5.197.0.0/16',
5461         'BA': '31.176.128.0/17',
5462         'BB': '65.48.128.0/17',
5463         'BD': '114.130.0.0/16',
5464         'BE': '57.0.0.0/8',
5465         'BF': '102.178.0.0/15',
5466         'BG': '95.42.0.0/15',
5467         'BH': '37.131.0.0/17',
5468         'BI': '154.117.192.0/18',
5469         'BJ': '137.255.0.0/16',
5470         'BL': '185.212.72.0/23',
5471         'BM': '196.12.64.0/18',
5472         'BN': '156.31.0.0/16',
5473         'BO': '161.56.0.0/16',
5474         'BQ': '161.0.80.0/20',
5475         'BR': '191.128.0.0/12',
5476         'BS': '24.51.64.0/18',
5477         'BT': '119.2.96.0/19',
5478         'BW': '168.167.0.0/16',
5479         'BY': '178.120.0.0/13',
5480         'BZ': '179.42.192.0/18',
5481         'CA': '99.224.0.0/11',
5482         'CD': '41.243.0.0/16',
5483         'CF': '197.242.176.0/21',
5484         'CG': '160.113.0.0/16',
5485         'CH': '85.0.0.0/13',
5486         'CI': '102.136.0.0/14',
5487         'CK': '202.65.32.0/19',
5488         'CL': '152.172.0.0/14',
5489         'CM': '102.244.0.0/14',
5490         'CN': '36.128.0.0/10',
5491         'CO': '181.240.0.0/12',
5492         'CR': '201.192.0.0/12',
5493         'CU': '152.206.0.0/15',
5494         'CV': '165.90.96.0/19',
5495         'CW': '190.88.128.0/17',
5496         'CY': '31.153.0.0/16',
5497         'CZ': '88.100.0.0/14',
5498         'DE': '53.0.0.0/8',
5499         'DJ': '197.241.0.0/17',
5500         'DK': '87.48.0.0/12',
5501         'DM': '192.243.48.0/20',
5502         'DO': '152.166.0.0/15',
5503         'DZ': '41.96.0.0/12',
5504         'EC': '186.68.0.0/15',
5505         'EE': '90.190.0.0/15',
5506         'EG': '156.160.0.0/11',
5507         'ER': '196.200.96.0/20',
5508         'ES': '88.0.0.0/11',
5509         'ET': '196.188.0.0/14',
5510         'EU': '2.16.0.0/13',
5511         'FI': '91.152.0.0/13',
5512         'FJ': '144.120.0.0/16',
5513         'FK': '80.73.208.0/21',
5514         'FM': '119.252.112.0/20',
5515         'FO': '88.85.32.0/19',
5516         'FR': '90.0.0.0/9',
5517         'GA': '41.158.0.0/15',
5518         'GB': '25.0.0.0/8',
5519         'GD': '74.122.88.0/21',
5520         'GE': '31.146.0.0/16',
5521         'GF': '161.22.64.0/18',
5522         'GG': '62.68.160.0/19',
5523         'GH': '154.160.0.0/12',
5524         'GI': '95.164.0.0/16',
5525         'GL': '88.83.0.0/19',
5526         'GM': '160.182.0.0/15',
5527         'GN': '197.149.192.0/18',
5528         'GP': '104.250.0.0/19',
5529         'GQ': '105.235.224.0/20',
5530         'GR': '94.64.0.0/13',
5531         'GT': '168.234.0.0/16',
5532         'GU': '168.123.0.0/16',
5533         'GW': '197.214.80.0/20',
5534         'GY': '181.41.64.0/18',
5535         'HK': '113.252.0.0/14',
5536         'HN': '181.210.0.0/16',
5537         'HR': '93.136.0.0/13',
5538         'HT': '148.102.128.0/17',
5539         'HU': '84.0.0.0/14',
5540         'ID': '39.192.0.0/10',
5541         'IE': '87.32.0.0/12',
5542         'IL': '79.176.0.0/13',
5543         'IM': '5.62.80.0/20',
5544         'IN': '117.192.0.0/10',
5545         'IO': '203.83.48.0/21',
5546         'IQ': '37.236.0.0/14',
5547         'IR': '2.176.0.0/12',
5548         'IS': '82.221.0.0/16',
5549         'IT': '79.0.0.0/10',
5550         'JE': '87.244.64.0/18',
5551         'JM': '72.27.0.0/17',
5552         'JO': '176.29.0.0/16',
5553         'JP': '133.0.0.0/8',
5554         'KE': '105.48.0.0/12',
5555         'KG': '158.181.128.0/17',
5556         'KH': '36.37.128.0/17',
5557         'KI': '103.25.140.0/22',
5558         'KM': '197.255.224.0/20',
5559         'KN': '198.167.192.0/19',
5560         'KP': '175.45.176.0/22',
5561         'KR': '175.192.0.0/10',
5562         'KW': '37.36.0.0/14',
5563         'KY': '64.96.0.0/15',
5564         'KZ': '2.72.0.0/13',
5565         'LA': '115.84.64.0/18',
5566         'LB': '178.135.0.0/16',
5567         'LC': '24.92.144.0/20',
5568         'LI': '82.117.0.0/19',
5569         'LK': '112.134.0.0/15',
5570         'LR': '102.183.0.0/16',
5571         'LS': '129.232.0.0/17',
5572         'LT': '78.56.0.0/13',
5573         'LU': '188.42.0.0/16',
5574         'LV': '46.109.0.0/16',
5575         'LY': '41.252.0.0/14',
5576         'MA': '105.128.0.0/11',
5577         'MC': '88.209.64.0/18',
5578         'MD': '37.246.0.0/16',
5579         'ME': '178.175.0.0/17',
5580         'MF': '74.112.232.0/21',
5581         'MG': '154.126.0.0/17',
5582         'MH': '117.103.88.0/21',
5583         'MK': '77.28.0.0/15',
5584         'ML': '154.118.128.0/18',
5585         'MM': '37.111.0.0/17',
5586         'MN': '49.0.128.0/17',
5587         'MO': '60.246.0.0/16',
5588         'MP': '202.88.64.0/20',
5589         'MQ': '109.203.224.0/19',
5590         'MR': '41.188.64.0/18',
5591         'MS': '208.90.112.0/22',
5592         'MT': '46.11.0.0/16',
5593         'MU': '105.16.0.0/12',
5594         'MV': '27.114.128.0/18',
5595         'MW': '102.70.0.0/15',
5596         'MX': '187.192.0.0/11',
5597         'MY': '175.136.0.0/13',
5598         'MZ': '197.218.0.0/15',
5599         'NA': '41.182.0.0/16',
5600         'NC': '101.101.0.0/18',
5601         'NE': '197.214.0.0/18',
5602         'NF': '203.17.240.0/22',
5603         'NG': '105.112.0.0/12',
5604         'NI': '186.76.0.0/15',
5605         'NL': '145.96.0.0/11',
5606         'NO': '84.208.0.0/13',
5607         'NP': '36.252.0.0/15',
5608         'NR': '203.98.224.0/19',
5609         'NU': '49.156.48.0/22',
5610         'NZ': '49.224.0.0/14',
5611         'OM': '5.36.0.0/15',
5612         'PA': '186.72.0.0/15',
5613         'PE': '186.160.0.0/14',
5614         'PF': '123.50.64.0/18',
5615         'PG': '124.240.192.0/19',
5616         'PH': '49.144.0.0/13',
5617         'PK': '39.32.0.0/11',
5618         'PL': '83.0.0.0/11',
5619         'PM': '70.36.0.0/20',
5620         'PR': '66.50.0.0/16',
5621         'PS': '188.161.0.0/16',
5622         'PT': '85.240.0.0/13',
5623         'PW': '202.124.224.0/20',
5624         'PY': '181.120.0.0/14',
5625         'QA': '37.210.0.0/15',
5626         'RE': '102.35.0.0/16',
5627         'RO': '79.112.0.0/13',
5628         'RS': '93.86.0.0/15',
5629         'RU': '5.136.0.0/13',
5630         'RW': '41.186.0.0/16',
5631         'SA': '188.48.0.0/13',
5632         'SB': '202.1.160.0/19',
5633         'SC': '154.192.0.0/11',
5634         'SD': '102.120.0.0/13',
5635         'SE': '78.64.0.0/12',
5636         'SG': '8.128.0.0/10',
5637         'SI': '188.196.0.0/14',
5638         'SK': '78.98.0.0/15',
5639         'SL': '102.143.0.0/17',
5640         'SM': '89.186.32.0/19',
5641         'SN': '41.82.0.0/15',
5642         'SO': '154.115.192.0/18',
5643         'SR': '186.179.128.0/17',
5644         'SS': '105.235.208.0/21',
5645         'ST': '197.159.160.0/19',
5646         'SV': '168.243.0.0/16',
5647         'SX': '190.102.0.0/20',
5648         'SY': '5.0.0.0/16',
5649         'SZ': '41.84.224.0/19',
5650         'TC': '65.255.48.0/20',
5651         'TD': '154.68.128.0/19',
5652         'TG': '196.168.0.0/14',
5653         'TH': '171.96.0.0/13',
5654         'TJ': '85.9.128.0/18',
5655         'TK': '27.96.24.0/21',
5656         'TL': '180.189.160.0/20',
5657         'TM': '95.85.96.0/19',
5658         'TN': '197.0.0.0/11',
5659         'TO': '175.176.144.0/21',
5660         'TR': '78.160.0.0/11',
5661         'TT': '186.44.0.0/15',
5662         'TV': '202.2.96.0/19',
5663         'TW': '120.96.0.0/11',
5664         'TZ': '156.156.0.0/14',
5665         'UA': '37.52.0.0/14',
5666         'UG': '102.80.0.0/13',
5667         'US': '6.0.0.0/8',
5668         'UY': '167.56.0.0/13',
5669         'UZ': '84.54.64.0/18',
5670         'VA': '212.77.0.0/19',
5671         'VC': '207.191.240.0/21',
5672         'VE': '186.88.0.0/13',
5673         'VG': '66.81.192.0/20',
5674         'VI': '146.226.0.0/16',
5675         'VN': '14.160.0.0/11',
5676         'VU': '202.80.32.0/20',
5677         'WF': '117.20.32.0/21',
5678         'WS': '202.4.32.0/19',
5679         'YE': '134.35.0.0/16',
5680         'YT': '41.242.116.0/22',
5681         'ZA': '41.0.0.0/11',
5682         'ZM': '102.144.0.0/13',
5683         'ZW': '102.177.192.0/18',
5684     }
5685
5686     @classmethod
5687     def random_ipv4(cls, code_or_block):
5688         if len(code_or_block) == 2:
5689             block = cls._country_ip_map.get(code_or_block.upper())
5690             if not block:
5691                 return None
5692         else:
5693             block = code_or_block
5694         addr, preflen = block.split('/')
5695         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5696         addr_max = addr_min | (0xffffffff >> int(preflen))
5697         return compat_str(socket.inet_ntoa(
5698             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5699
5700
5701 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5702     def __init__(self, proxies=None):
5703         # Set default handlers
5704         for type in ('http', 'https'):
5705             setattr(self, '%s_open' % type,
5706                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5707                         meth(r, proxy, type))
5708         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5709
5710     def proxy_open(self, req, proxy, type):
5711         req_proxy = req.headers.get('Ytdl-request-proxy')
5712         if req_proxy is not None:
5713             proxy = req_proxy
5714             del req.headers['Ytdl-request-proxy']
5715
5716         if proxy == '__noproxy__':
5717             return None  # No Proxy
5718         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5719             req.add_header('Ytdl-socks-proxy', proxy)
5720             # yt-dlp's http/https handlers do wrapping the socket with socks
5721             return None
5722         return compat_urllib_request.ProxyHandler.proxy_open(
5723             self, req, proxy, type)
5724
5725
5726 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5727 # released into Public Domain
5728 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5729
5730 def long_to_bytes(n, blocksize=0):
5731     """long_to_bytes(n:long, blocksize:int) : string
5732     Convert a long integer to a byte string.
5733
5734     If optional blocksize is given and greater than zero, pad the front of the
5735     byte string with binary zeros so that the length is a multiple of
5736     blocksize.
5737     """
5738     # after much testing, this algorithm was deemed to be the fastest
5739     s = b''
5740     n = int(n)
5741     while n > 0:
5742         s = compat_struct_pack('>I', n & 0xffffffff) + s
5743         n = n >> 32
5744     # strip off leading zeros
5745     for i in range(len(s)):
5746         if s[i] != b'\000'[0]:
5747             break
5748     else:
5749         # only happens when n == 0
5750         s = b'\000'
5751         i = 0
5752     s = s[i:]
5753     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5754     # de-padding being done above, but sigh...
5755     if blocksize > 0 and len(s) % blocksize:
5756         s = (blocksize - len(s) % blocksize) * b'\000' + s
5757     return s
5758
5759
5760 def bytes_to_long(s):
5761     """bytes_to_long(string) : long
5762     Convert a byte string to a long integer.
5763
5764     This is (essentially) the inverse of long_to_bytes().
5765     """
5766     acc = 0
5767     length = len(s)
5768     if length % 4:
5769         extra = (4 - length % 4)
5770         s = b'\000' * extra + s
5771         length = length + extra
5772     for i in range(0, length, 4):
5773         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5774     return acc
5775
5776
5777 def ohdave_rsa_encrypt(data, exponent, modulus):
5778     '''
5779     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5780
5781     Input:
5782         data: data to encrypt, bytes-like object
5783         exponent, modulus: parameter e and N of RSA algorithm, both integer
5784     Output: hex string of encrypted data
5785
5786     Limitation: supports one block encryption only
5787     '''
5788
5789     payload = int(binascii.hexlify(data[::-1]), 16)
5790     encrypted = pow(payload, exponent, modulus)
5791     return '%x' % encrypted
5792
5793
5794 def pkcs1pad(data, length):
5795     """
5796     Padding input data with PKCS#1 scheme
5797
5798     @param {int[]} data        input data
5799     @param {int}   length      target length
5800     @returns {int[]}           padded data
5801     """
5802     if len(data) > length - 11:
5803         raise ValueError('Input data too long for PKCS#1 padding')
5804
5805     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5806     return [0, 2] + pseudo_random + [0] + data
5807
5808
5809 def encode_base_n(num, n, table=None):
5810     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5811     if not table:
5812         table = FULL_TABLE[:n]
5813
5814     if n > len(table):
5815         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5816
5817     if num == 0:
5818         return table[0]
5819
5820     ret = ''
5821     while num:
5822         ret = table[num % n] + ret
5823         num = num // n
5824     return ret
5825
5826
5827 def decode_packed_codes(code):
5828     mobj = re.search(PACKED_CODES_RE, code)
5829     obfuscated_code, base, count, symbols = mobj.groups()
5830     base = int(base)
5831     count = int(count)
5832     symbols = symbols.split('|')
5833     symbol_table = {}
5834
5835     while count:
5836         count -= 1
5837         base_n_count = encode_base_n(count, base)
5838         symbol_table[base_n_count] = symbols[count] or base_n_count
5839
5840     return re.sub(
5841         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5842         obfuscated_code)
5843
5844
5845 def caesar(s, alphabet, shift):
5846     if shift == 0:
5847         return s
5848     l = len(alphabet)
5849     return ''.join(
5850         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5851         for c in s)
5852
5853
5854 def rot47(s):
5855     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5856
5857
5858 def parse_m3u8_attributes(attrib):
5859     info = {}
5860     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5861         if val.startswith('"'):
5862             val = val[1:-1]
5863         info[key] = val
5864     return info
5865
5866
5867 def urshift(val, n):
5868     return val >> n if val >= 0 else (val + 0x100000000) >> n
5869
5870
5871 # Based on png2str() written by @gdkchan and improved by @yokrysty
5872 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5873 def decode_png(png_data):
5874     # Reference: https://www.w3.org/TR/PNG/
5875     header = png_data[8:]
5876
5877     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5878         raise IOError('Not a valid PNG file.')
5879
5880     int_map = {1: '>B', 2: '>H', 4: '>I'}
5881     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5882
5883     chunks = []
5884
5885     while header:
5886         length = unpack_integer(header[:4])
5887         header = header[4:]
5888
5889         chunk_type = header[:4]
5890         header = header[4:]
5891
5892         chunk_data = header[:length]
5893         header = header[length:]
5894
5895         header = header[4:]  # Skip CRC
5896
5897         chunks.append({
5898             'type': chunk_type,
5899             'length': length,
5900             'data': chunk_data
5901         })
5902
5903     ihdr = chunks[0]['data']
5904
5905     width = unpack_integer(ihdr[:4])
5906     height = unpack_integer(ihdr[4:8])
5907
5908     idat = b''
5909
5910     for chunk in chunks:
5911         if chunk['type'] == b'IDAT':
5912             idat += chunk['data']
5913
5914     if not idat:
5915         raise IOError('Unable to read PNG data.')
5916
5917     decompressed_data = bytearray(zlib.decompress(idat))
5918
5919     stride = width * 3
5920     pixels = []
5921
5922     def _get_pixel(idx):
5923         x = idx % stride
5924         y = idx // stride
5925         return pixels[y][x]
5926
5927     for y in range(height):
5928         basePos = y * (1 + stride)
5929         filter_type = decompressed_data[basePos]
5930
5931         current_row = []
5932
5933         pixels.append(current_row)
5934
5935         for x in range(stride):
5936             color = decompressed_data[1 + basePos + x]
5937             basex = y * stride + x
5938             left = 0
5939             up = 0
5940
5941             if x > 2:
5942                 left = _get_pixel(basex - 3)
5943             if y > 0:
5944                 up = _get_pixel(basex - stride)
5945
5946             if filter_type == 1:  # Sub
5947                 color = (color + left) & 0xff
5948             elif filter_type == 2:  # Up
5949                 color = (color + up) & 0xff
5950             elif filter_type == 3:  # Average
5951                 color = (color + ((left + up) >> 1)) & 0xff
5952             elif filter_type == 4:  # Paeth
5953                 a = left
5954                 b = up
5955                 c = 0
5956
5957                 if x > 2 and y > 0:
5958                     c = _get_pixel(basex - stride - 3)
5959
5960                 p = a + b - c
5961
5962                 pa = abs(p - a)
5963                 pb = abs(p - b)
5964                 pc = abs(p - c)
5965
5966                 if pa <= pb and pa <= pc:
5967                     color = (color + a) & 0xff
5968                 elif pb <= pc:
5969                     color = (color + b) & 0xff
5970                 else:
5971                     color = (color + c) & 0xff
5972
5973             current_row.append(color)
5974
5975     return width, height, pixels
5976
5977
5978 def write_xattr(path, key, value):
5979     # This mess below finds the best xattr tool for the job
5980     try:
5981         # try the pyxattr module...
5982         import xattr
5983
5984         if hasattr(xattr, 'set'):  # pyxattr
5985             # Unicode arguments are not supported in python-pyxattr until
5986             # version 0.5.0
5987             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5988             pyxattr_required_version = '0.5.0'
5989             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5990                 # TODO: fallback to CLI tools
5991                 raise XAttrUnavailableError(
5992                     'python-pyxattr is detected but is too old. '
5993                     'yt-dlp requires %s or above while your version is %s. '
5994                     'Falling back to other xattr implementations' % (
5995                         pyxattr_required_version, xattr.__version__))
5996
5997             setxattr = xattr.set
5998         else:  # xattr
5999             setxattr = xattr.setxattr
6000
6001         try:
6002             setxattr(path, key, value)
6003         except EnvironmentError as e:
6004             raise XAttrMetadataError(e.errno, e.strerror)
6005
6006     except ImportError:
6007         if compat_os_name == 'nt':
6008             # Write xattrs to NTFS Alternate Data Streams:
6009             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6010             assert ':' not in key
6011             assert os.path.exists(path)
6012
6013             ads_fn = path + ':' + key
6014             try:
6015                 with open(ads_fn, 'wb') as f:
6016                     f.write(value)
6017             except EnvironmentError as e:
6018                 raise XAttrMetadataError(e.errno, e.strerror)
6019         else:
6020             user_has_setfattr = check_executable('setfattr', ['--version'])
6021             user_has_xattr = check_executable('xattr', ['-h'])
6022
6023             if user_has_setfattr or user_has_xattr:
6024
6025                 value = value.decode('utf-8')
6026                 if user_has_setfattr:
6027                     executable = 'setfattr'
6028                     opts = ['-n', key, '-v', value]
6029                 elif user_has_xattr:
6030                     executable = 'xattr'
6031                     opts = ['-w', key, value]
6032
6033                 cmd = ([encodeFilename(executable, True)]
6034                        + [encodeArgument(o) for o in opts]
6035                        + [encodeFilename(path, True)])
6036
6037                 try:
6038                     p = subprocess.Popen(
6039                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6040                 except EnvironmentError as e:
6041                     raise XAttrMetadataError(e.errno, e.strerror)
6042                 stdout, stderr = process_communicate_or_kill(p)
6043                 stderr = stderr.decode('utf-8', 'replace')
6044                 if p.returncode != 0:
6045                     raise XAttrMetadataError(p.returncode, stderr)
6046
6047             else:
6048                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6049                 if sys.platform.startswith('linux'):
6050                     raise XAttrUnavailableError(
6051                         "Couldn't find a tool to set the xattrs. "
6052                         "Install either the python 'pyxattr' or 'xattr' "
6053                         "modules, or the GNU 'attr' package "
6054                         "(which contains the 'setfattr' tool).")
6055                 else:
6056                     raise XAttrUnavailableError(
6057                         "Couldn't find a tool to set the xattrs. "
6058                         "Install either the python 'xattr' module, "
6059                         "or the 'xattr' binary.")
6060
6061
6062 def random_birthday(year_field, month_field, day_field):
6063     start_date = datetime.date(1950, 1, 1)
6064     end_date = datetime.date(1995, 12, 31)
6065     offset = random.randint(0, (end_date - start_date).days)
6066     random_date = start_date + datetime.timedelta(offset)
6067     return {
6068         year_field: str(random_date.year),
6069         month_field: str(random_date.month),
6070         day_field: str(random_date.day),
6071     }
6072
6073
6074 # Templates for internet shortcut files, which are plain text files.
6075 DOT_URL_LINK_TEMPLATE = '''
6076 [InternetShortcut]
6077 URL=%(url)s
6078 '''.lstrip()
6079
6080 DOT_WEBLOC_LINK_TEMPLATE = '''
6081 <?xml version="1.0" encoding="UTF-8"?>
6082 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6083 <plist version="1.0">
6084 <dict>
6085 \t<key>URL</key>
6086 \t<string>%(url)s</string>
6087 </dict>
6088 </plist>
6089 '''.lstrip()
6090
6091 DOT_DESKTOP_LINK_TEMPLATE = '''
6092 [Desktop Entry]
6093 Encoding=UTF-8
6094 Name=%(filename)s
6095 Type=Link
6096 URL=%(url)s
6097 Icon=text-html
6098 '''.lstrip()
6099
6100
6101 def iri_to_uri(iri):
6102     """
6103     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6104
6105     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6106     """
6107
6108     iri_parts = compat_urllib_parse_urlparse(iri)
6109
6110     if '[' in iri_parts.netloc:
6111         raise ValueError('IPv6 URIs are not, yet, supported.')
6112         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6113
6114     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6115
6116     net_location = ''
6117     if iri_parts.username:
6118         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6119         if iri_parts.password is not None:
6120             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6121         net_location += '@'
6122
6123     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6124     # The 'idna' encoding produces ASCII text.
6125     if iri_parts.port is not None and iri_parts.port != 80:
6126         net_location += ':' + str(iri_parts.port)
6127
6128     return compat_urllib_parse_urlunparse(
6129         (iri_parts.scheme,
6130             net_location,
6131
6132             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6133
6134             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6135             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6136
6137             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6138             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6139
6140             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6141
6142     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6143
6144
6145 def to_high_limit_path(path):
6146     if sys.platform in ['win32', 'cygwin']:
6147         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6148         return r'\\?\ '.rstrip() + os.path.abspath(path)
6149
6150     return path
6151
6152
6153 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6154     val = obj.get(field, default)
6155     if func and val not in ignore:
6156         val = func(val)
6157     return template % val if val not in ignore else default
6158
6159
6160 def clean_podcast_url(url):
6161     return re.sub(r'''(?x)
6162         (?:
6163             (?:
6164                 chtbl\.com/track|
6165                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6166                 play\.podtrac\.com
6167             )/[^/]+|
6168             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6169             flex\.acast\.com|
6170             pd(?:
6171                 cn\.co| # https://podcorn.com/analytics-prefix/
6172                 st\.fm # https://podsights.com/docs/
6173             )/e
6174         )/''', '', url)
6175
6176
6177 _HEX_TABLE = '0123456789abcdef'
6178
6179
6180 def random_uuidv4():
6181     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6182
6183
6184 def make_dir(path, to_screen=None):
6185     try:
6186         dn = os.path.dirname(path)
6187         if dn and not os.path.exists(dn):
6188             os.makedirs(dn)
6189         return True
6190     except (OSError, IOError) as err:
6191         if callable(to_screen) is not None:
6192             to_screen('unable to create directory ' + error_to_compat_str(err))
6193         return False
6194
6195
6196 def get_executable_path():
6197     from zipimport import zipimporter
6198     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6199         path = os.path.dirname(sys.executable)
6200     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6201         path = os.path.join(os.path.dirname(__file__), '../..')
6202     else:
6203         path = os.path.join(os.path.dirname(__file__), '..')
6204     return os.path.abspath(path)
6205
6206
6207 def load_plugins(name, suffix, namespace):
6208     plugin_info = [None]
6209     classes = []
6210     try:
6211         plugin_info = imp.find_module(
6212             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6213         plugins = imp.load_module(name, *plugin_info)
6214         for name in dir(plugins):
6215             if name in namespace:
6216                 continue
6217             if not name.endswith(suffix):
6218                 continue
6219             klass = getattr(plugins, name)
6220             classes.append(klass)
6221             namespace[name] = klass
6222     except ImportError:
6223         pass
6224     finally:
6225         if plugin_info[0] is not None:
6226             plugin_info[0].close()
6227     return classes
6228
6229
6230 def traverse_obj(
6231         obj, *path_list, default=None, expected_type=None, get_all=True,
6232         casesense=True, is_user_input=False, traverse_string=False):
6233     ''' Traverse nested list/dict/tuple
6234     @param path_list        A list of paths which are checked one by one.
6235                             Each path is a list of keys where each key is a string,
6236                             a tuple of strings or "...". When a tuple is given,
6237                             all the keys given in the tuple are traversed, and
6238                             "..." traverses all the keys in the object
6239     @param default          Default value to return
6240     @param expected_type    Only accept final value of this type (Can also be any callable)
6241     @param get_all          Return all the values obtained from a path or only the first one
6242     @param casesense        Whether to consider dictionary keys as case sensitive
6243     @param is_user_input    Whether the keys are generated from user input. If True,
6244                             strings are converted to int/slice if necessary
6245     @param traverse_string  Whether to traverse inside strings. If True, any
6246                             non-compatible object will also be converted into a string
6247     # TODO: Write tests
6248     '''
6249     if not casesense:
6250         _lower = lambda k: k.lower() if isinstance(k, str) else k
6251         path_list = (map(_lower, variadic(path)) for path in path_list)
6252
6253     def _traverse_obj(obj, path, _current_depth=0):
6254         nonlocal depth
6255         path = tuple(variadic(path))
6256         for i, key in enumerate(path):
6257             if isinstance(key, (list, tuple)):
6258                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6259                 key = ...
6260             if key is ...:
6261                 obj = (obj.values() if isinstance(obj, dict)
6262                        else obj if isinstance(obj, (list, tuple, LazyList))
6263                        else str(obj) if traverse_string else [])
6264                 _current_depth += 1
6265                 depth = max(depth, _current_depth)
6266                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6267             elif isinstance(obj, dict):
6268                 obj = (obj.get(key) if casesense or (key in obj)
6269                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6270             else:
6271                 if is_user_input:
6272                     key = (int_or_none(key) if ':' not in key
6273                            else slice(*map(int_or_none, key.split(':'))))
6274                     if key == slice(None):
6275                         return _traverse_obj(obj, (..., *path[i + 1:]))
6276                 if not isinstance(key, (int, slice)):
6277                     return None
6278                 if not isinstance(obj, (list, tuple, LazyList)):
6279                     if not traverse_string:
6280                         return None
6281                     obj = str(obj)
6282                 try:
6283                     obj = obj[key]
6284                 except IndexError:
6285                     return None
6286         return obj
6287
6288     if isinstance(expected_type, type):
6289         type_test = lambda val: val if isinstance(val, expected_type) else None
6290     elif expected_type is not None:
6291         type_test = expected_type
6292     else:
6293         type_test = lambda val: val
6294
6295     for path in path_list:
6296         depth = 0
6297         val = _traverse_obj(obj, path)
6298         if val is not None:
6299             if depth:
6300                 for _ in range(depth - 1):
6301                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6302                 val = [v for v in map(type_test, val) if v is not None]
6303                 if val:
6304                     return val if get_all else val[0]
6305             else:
6306                 val = type_test(val)
6307                 if val is not None:
6308                     return val
6309     return default
6310
6311
6312 def traverse_dict(dictn, keys, casesense=True):
6313     ''' For backward compatibility. Do not use '''
6314     return traverse_obj(dictn, keys, casesense=casesense,
6315                         is_user_input=True, traverse_string=True)
6316
6317
6318 def variadic(x, allowed_types=(str, bytes)):
6319     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)