yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y/%m/%d',
1744     '%Y/%m/%d %H:%M',
1745     '%Y/%m/%d %H:%M:%S',
1746     '%Y-%m-%d %H:%M',
1747     '%Y-%m-%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M:%S.%f',
1749     '%Y-%m-%d %H:%M:%S:%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     if s == '':
2111         return ''
2112     # Handle timestamps
2113     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114     result = ''.join(map(replace_insane, s))
2115     if not is_id:
2116         while '__' in result:
2117             result = result.replace('__', '_')
2118         result = result.strip('_')
2119         # Common case of "Foreign band name - English song title"
2120         if restricted and result.startswith('-_'):
2121             result = result[2:]
2122         if result.startswith('-'):
2123             result = '_' + result[len('-'):]
2124         result = result.lstrip('.')
2125         if not result:
2126             result = '_'
2127     return result
2128
2129
2130 def sanitize_path(s, force=False):
2131     """Sanitizes and normalizes path on Windows"""
2132     if sys.platform == 'win32':
2133         force = False
2134         drive_or_unc, _ = os.path.splitdrive(s)
2135         if sys.version_info < (2, 7) and not drive_or_unc:
2136             drive_or_unc, _ = os.path.splitunc(s)
2137     elif force:
2138         drive_or_unc = ''
2139     else:
2140         return s
2141
2142     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143     if drive_or_unc:
2144         norm_path.pop(0)
2145     sanitized_path = [
2146         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147         for path_part in norm_path]
2148     if drive_or_unc:
2149         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150     elif force and s[0] == os.path.sep:
2151         sanitized_path.insert(0, os.path.sep)
2152     return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157     # the number of unwanted failures due to missing protocol
2158     if url.startswith('//'):
2159         return 'http:%s' % url
2160     # Fix some common typos seen so far
2161     COMMON_TYPOS = (
2162         # https://github.com/ytdl-org/youtube-dl/issues/15649
2163         (r'^httpss://', r'https://'),
2164         # https://bx1.be/lives/direct-tv/
2165         (r'^rmtp([es]?)://', r'rtmp\1://'),
2166     )
2167     for mistake, fixup in COMMON_TYPOS:
2168         if re.match(mistake, url):
2169             return re.sub(mistake, fixup, url)
2170     return url
2171
2172
2173 def extract_basic_auth(url):
2174     parts = compat_urlparse.urlsplit(url)
2175     if parts.username is None:
2176         return url, None
2177     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178         parts.hostname if parts.port is None
2179         else '%s:%d' % (parts.hostname, parts.port))))
2180     auth_payload = base64.b64encode(
2181         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182     return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187     if auth_header is not None:
2188         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189         headers['Authorization'] = auth_header
2190     return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194     """Expand shell variables and ~"""
2195     return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199     """ Remove all duplicates from the input iterable """
2200     res = []
2201     for el in iterable:
2202         if el not in res:
2203             res.append(el)
2204     return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208     """Transforms an HTML entity to a character."""
2209     entity = entity_with_semicolon[:-1]
2210
2211     # Known non-numeric HTML entity
2212     if entity in compat_html_entities.name2codepoint:
2213         return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215     # TODO: HTML5 allows entities without a semicolon. For example,
2216     # '&Eacuteric' should be decoded as 'Éric'.
2217     if entity_with_semicolon in compat_html_entities_html5:
2218         return compat_html_entities_html5[entity_with_semicolon]
2219
2220     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221     if mobj is not None:
2222         numstr = mobj.group(1)
2223         if numstr.startswith('x'):
2224             base = 16
2225             numstr = '0%s' % numstr
2226         else:
2227             base = 10
2228         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229         try:
2230             return compat_chr(int(numstr, base))
2231         except ValueError:
2232             pass
2233
2234     # Unknown entity in name, return its literal representation
2235     return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239     if s is None:
2240         return None
2241     assert type(s) == compat_str
2242
2243     return re.sub(
2244         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def escapeHTML(text):
2248     return (
2249         text
2250         .replace('&', '&amp;')
2251         .replace('<', '&lt;')
2252         .replace('>', '&gt;')
2253         .replace('"', '&quot;')
2254         .replace("'", '&#39;')
2255     )
2256
2257
2258 def process_communicate_or_kill(p, *args, **kwargs):
2259     try:
2260         return p.communicate(*args, **kwargs)
2261     except BaseException:  # Including KeyboardInterrupt
2262         p.kill()
2263         p.wait()
2264         raise
2265
2266
2267 def get_subprocess_encoding():
2268     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269         # For subprocess calls, encode with locale encoding
2270         # Refer to http://stackoverflow.com/a/9951851/35070
2271         encoding = preferredencoding()
2272     else:
2273         encoding = sys.getfilesystemencoding()
2274     if encoding is None:
2275         encoding = 'utf-8'
2276     return encoding
2277
2278
2279 def encodeFilename(s, for_subprocess=False):
2280     """
2281     @param s The name of the file
2282     """
2283
2284     assert type(s) == compat_str
2285
2286     # Python 3 has a Unicode API
2287     if sys.version_info >= (3, 0):
2288         return s
2289
2290     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294         return s
2295
2296     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297     if sys.platform.startswith('java'):
2298         return s
2299
2300     return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def decodeFilename(b, for_subprocess=False):
2304
2305     if sys.version_info >= (3, 0):
2306         return b
2307
2308     if not isinstance(b, bytes):
2309         return b
2310
2311     return b.decode(get_subprocess_encoding(), 'ignore')
2312
2313
2314 def encodeArgument(s):
2315     if not isinstance(s, compat_str):
2316         # Legacy code that uses byte strings
2317         # Uncomment the following line after fixing all post processors
2318         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2319         s = s.decode('ascii')
2320     return encodeFilename(s, True)
2321
2322
2323 def decodeArgument(b):
2324     return decodeFilename(b, True)
2325
2326
2327 def decodeOption(optval):
2328     if optval is None:
2329         return optval
2330     if isinstance(optval, bytes):
2331         optval = optval.decode(preferredencoding())
2332
2333     assert isinstance(optval, compat_str)
2334     return optval
2335
2336
2337 def formatSeconds(secs, delim=':', msec=False):
2338     if secs > 3600:
2339         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2340     elif secs > 60:
2341         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2342     else:
2343         ret = '%d' % secs
2344     return '%s.%03d' % (ret, secs % 1) if msec else ret
2345
2346
2347 def make_HTTPS_handler(params, **kwargs):
2348     opts_no_check_certificate = params.get('nocheckcertificate', False)
2349     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2350         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2351         if opts_no_check_certificate:
2352             context.check_hostname = False
2353             context.verify_mode = ssl.CERT_NONE
2354         try:
2355             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2356         except TypeError:
2357             # Python 2.7.8
2358             # (create_default_context present but HTTPSHandler has no context=)
2359             pass
2360
2361     if sys.version_info < (3, 2):
2362         return YoutubeDLHTTPSHandler(params, **kwargs)
2363     else:  # Python < 3.4
2364         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2365         context.verify_mode = (ssl.CERT_NONE
2366                                if opts_no_check_certificate
2367                                else ssl.CERT_REQUIRED)
2368         context.set_default_verify_paths()
2369         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2370
2371
2372 def bug_reports_message(before=';'):
2373     if ytdl_is_updateable():
2374         update_cmd = 'type  yt-dlp -U  to update'
2375     else:
2376         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2377     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2378     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2379     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2380
2381     before = before.rstrip()
2382     if not before or before.endswith(('.', '!', '?')):
2383         msg = msg[0].title() + msg[1:]
2384
2385     return (before + ' ' if before else '') + msg
2386
2387
2388 class YoutubeDLError(Exception):
2389     """Base exception for YoutubeDL errors."""
2390     pass
2391
2392
2393 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394 if hasattr(ssl, 'CertificateError'):
2395     network_exceptions.append(ssl.CertificateError)
2396 network_exceptions = tuple(network_exceptions)
2397
2398
2399 class ExtractorError(YoutubeDLError):
2400     """Error during info extraction."""
2401
2402     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2403         """ tb, if given, is the original traceback (so that it can be printed out).
2404         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2405         """
2406
2407         if sys.exc_info()[0] in network_exceptions:
2408             expected = True
2409         if video_id is not None:
2410             msg = video_id + ': ' + msg
2411         if cause:
2412             msg += ' (caused by %r)' % cause
2413         if not expected:
2414             msg += bug_reports_message()
2415         super(ExtractorError, self).__init__(msg)
2416
2417         self.traceback = tb
2418         self.exc_info = sys.exc_info()  # preserve original exception
2419         self.cause = cause
2420         self.video_id = video_id
2421
2422     def format_traceback(self):
2423         if self.traceback is None:
2424             return None
2425         return ''.join(traceback.format_tb(self.traceback))
2426
2427
2428 class UnsupportedError(ExtractorError):
2429     def __init__(self, url):
2430         super(UnsupportedError, self).__init__(
2431             'Unsupported URL: %s' % url, expected=True)
2432         self.url = url
2433
2434
2435 class RegexNotFoundError(ExtractorError):
2436     """Error when a regex didn't match"""
2437     pass
2438
2439
2440 class GeoRestrictedError(ExtractorError):
2441     """Geographic restriction Error exception.
2442
2443     This exception may be thrown when a video is not available from your
2444     geographic location due to geographic restrictions imposed by a website.
2445     """
2446
2447     def __init__(self, msg, countries=None):
2448         super(GeoRestrictedError, self).__init__(msg, expected=True)
2449         self.msg = msg
2450         self.countries = countries
2451
2452
2453 class DownloadError(YoutubeDLError):
2454     """Download Error exception.
2455
2456     This exception may be thrown by FileDownloader objects if they are not
2457     configured to continue on errors. They will contain the appropriate
2458     error message.
2459     """
2460
2461     def __init__(self, msg, exc_info=None):
2462         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463         super(DownloadError, self).__init__(msg)
2464         self.exc_info = exc_info
2465
2466
2467 class EntryNotInPlaylist(YoutubeDLError):
2468     """Entry not in playlist exception.
2469
2470     This exception will be thrown by YoutubeDL when a requested entry
2471     is not found in the playlist info_dict
2472     """
2473     pass
2474
2475
2476 class SameFileError(YoutubeDLError):
2477     """Same File exception.
2478
2479     This exception will be thrown by FileDownloader objects if they detect
2480     multiple files would have to be downloaded to the same file on disk.
2481     """
2482     pass
2483
2484
2485 class PostProcessingError(YoutubeDLError):
2486     """Post Processing exception.
2487
2488     This exception may be raised by PostProcessor's .run() method to
2489     indicate an error in the postprocessing task.
2490     """
2491
2492     def __init__(self, msg):
2493         super(PostProcessingError, self).__init__(msg)
2494         self.msg = msg
2495
2496
2497 class ExistingVideoReached(YoutubeDLError):
2498     """ --max-downloads limit has been reached. """
2499     pass
2500
2501
2502 class RejectedVideoReached(YoutubeDLError):
2503     """ --max-downloads limit has been reached. """
2504     pass
2505
2506
2507 class ThrottledDownload(YoutubeDLError):
2508     """ Download speed below --throttled-rate. """
2509     pass
2510
2511
2512 class MaxDownloadsReached(YoutubeDLError):
2513     """ --max-downloads limit has been reached. """
2514     pass
2515
2516
2517 class UnavailableVideoError(YoutubeDLError):
2518     """Unavailable Format exception.
2519
2520     This exception will be thrown when a video is requested
2521     in a format that is not available for that video.
2522     """
2523     pass
2524
2525
2526 class ContentTooShortError(YoutubeDLError):
2527     """Content Too Short exception.
2528
2529     This exception may be raised by FileDownloader objects when a file they
2530     download is too small for what the server announced first, indicating
2531     the connection was probably interrupted.
2532     """
2533
2534     def __init__(self, downloaded, expected):
2535         super(ContentTooShortError, self).__init__(
2536             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537         )
2538         # Both in bytes
2539         self.downloaded = downloaded
2540         self.expected = expected
2541
2542
2543 class XAttrMetadataError(YoutubeDLError):
2544     def __init__(self, code=None, msg='Unknown error'):
2545         super(XAttrMetadataError, self).__init__(msg)
2546         self.code = code
2547         self.msg = msg
2548
2549         # Parsing code and msg
2550         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2551                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2552             self.reason = 'NO_SPACE'
2553         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554             self.reason = 'VALUE_TOO_LONG'
2555         else:
2556             self.reason = 'NOT_SUPPORTED'
2557
2558
2559 class XAttrUnavailableError(YoutubeDLError):
2560     pass
2561
2562
2563 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2564     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565     # expected HTTP responses to meet HTTP/1.0 or later (see also
2566     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2567     if sys.version_info < (3, 0):
2568         kwargs['strict'] = True
2569     hc = http_class(*args, **compat_kwargs(kwargs))
2570     source_address = ydl_handler._params.get('source_address')
2571
2572     if source_address is not None:
2573         # This is to workaround _create_connection() from socket where it will try all
2574         # address data from getaddrinfo() including IPv6. This filters the result from
2575         # getaddrinfo() based on the source_address value.
2576         # This is based on the cpython socket.create_connection() function.
2577         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579             host, port = address
2580             err = None
2581             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2582             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583             ip_addrs = [addr for addr in addrs if addr[0] == af]
2584             if addrs and not ip_addrs:
2585                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586                 raise socket.error(
2587                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588                     % (ip_version, source_address[0]))
2589             for res in ip_addrs:
2590                 af, socktype, proto, canonname, sa = res
2591                 sock = None
2592                 try:
2593                     sock = socket.socket(af, socktype, proto)
2594                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595                         sock.settimeout(timeout)
2596                     sock.bind(source_address)
2597                     sock.connect(sa)
2598                     err = None  # Explicitly break reference cycle
2599                     return sock
2600                 except socket.error as _:
2601                     err = _
2602                     if sock is not None:
2603                         sock.close()
2604             if err is not None:
2605                 raise err
2606             else:
2607                 raise socket.error('getaddrinfo returns an empty list')
2608         if hasattr(hc, '_create_connection'):
2609             hc._create_connection = _create_connection
2610         sa = (source_address, 0)
2611         if hasattr(hc, 'source_address'):  # Python 2.7+
2612             hc.source_address = sa
2613         else:  # Python 2.6
2614             def _hc_connect(self, *args, **kwargs):
2615                 sock = _create_connection(
2616                     (self.host, self.port), self.timeout, sa)
2617                 if is_https:
2618                     self.sock = ssl.wrap_socket(
2619                         sock, self.key_file, self.cert_file,
2620                         ssl_version=ssl.PROTOCOL_TLSv1)
2621                 else:
2622                     self.sock = sock
2623             hc.connect = functools.partial(_hc_connect, hc)
2624
2625     return hc
2626
2627
2628 def handle_youtubedl_headers(headers):
2629     filtered_headers = headers
2630
2631     if 'Youtubedl-no-compression' in filtered_headers:
2632         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2633         del filtered_headers['Youtubedl-no-compression']
2634
2635     return filtered_headers
2636
2637
2638 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2639     """Handler for HTTP requests and responses.
2640
2641     This class, when installed with an OpenerDirector, automatically adds
2642     the standard headers to every HTTP request and handles gzipped and
2643     deflated responses from web servers. If compression is to be avoided in
2644     a particular request, the original request in the program code only has
2645     to include the HTTP header "Youtubedl-no-compression", which will be
2646     removed before making the real request.
2647
2648     Part of this code was copied from:
2649
2650     http://techknack.net/python-urllib2-handlers/
2651
2652     Andrew Rowls, the author of that code, agreed to release it to the
2653     public domain.
2654     """
2655
2656     def __init__(self, params, *args, **kwargs):
2657         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658         self._params = params
2659
2660     def http_open(self, req):
2661         conn_class = compat_http_client.HTTPConnection
2662
2663         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664         if socks_proxy:
2665             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666             del req.headers['Ytdl-socks-proxy']
2667
2668         return self.do_open(functools.partial(
2669             _create_http_connection, self, conn_class, False),
2670             req)
2671
2672     @staticmethod
2673     def deflate(data):
2674         if not data:
2675             return data
2676         try:
2677             return zlib.decompress(data, -zlib.MAX_WBITS)
2678         except zlib.error:
2679             return zlib.decompress(data)
2680
2681     def http_request(self, req):
2682         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683         # always respected by websites, some tend to give out URLs with non percent-encoded
2684         # non-ASCII characters (see telemb.py, ard.py [#3412])
2685         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686         # To work around aforementioned issue we will replace request's original URL with
2687         # percent-encoded one
2688         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690         url = req.get_full_url()
2691         url_escaped = escape_url(url)
2692
2693         # Substitute URL if any change after escaping
2694         if url != url_escaped:
2695             req = update_Request(req, url=url_escaped)
2696
2697         for h, v in std_headers.items():
2698             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699             # The dict keys are capitalized because of this bug by urllib
2700             if h.capitalize() not in req.headers:
2701                 req.add_header(h, v)
2702
2703         req.headers = handle_youtubedl_headers(req.headers)
2704
2705         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706             # Python 2.6 is brain-dead when it comes to fragments
2707             req._Request__original = req._Request__original.partition('#')[0]
2708             req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
2710         return req
2711
2712     def http_response(self, req, resp):
2713         old_resp = resp
2714         # gzip
2715         if resp.headers.get('Content-encoding', '') == 'gzip':
2716             content = resp.read()
2717             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718             try:
2719                 uncompressed = io.BytesIO(gz.read())
2720             except IOError as original_ioerror:
2721                 # There may be junk add the end of the file
2722                 # See http://stackoverflow.com/q/4928560/35070 for details
2723                 for i in range(1, 1024):
2724                     try:
2725                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726                         uncompressed = io.BytesIO(gz.read())
2727                     except IOError:
2728                         continue
2729                     break
2730                 else:
2731                     raise original_ioerror
2732             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2733             resp.msg = old_resp.msg
2734             del resp.headers['Content-encoding']
2735         # deflate
2736         if resp.headers.get('Content-encoding', '') == 'deflate':
2737             gz = io.BytesIO(self.deflate(resp.read()))
2738             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2739             resp.msg = old_resp.msg
2740             del resp.headers['Content-encoding']
2741         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2742         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2743         if 300 <= resp.code < 400:
2744             location = resp.headers.get('Location')
2745             if location:
2746                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747                 if sys.version_info >= (3, 0):
2748                     location = location.encode('iso-8859-1').decode('utf-8')
2749                 else:
2750                     location = location.decode('utf-8')
2751                 location_escaped = escape_url(location)
2752                 if location != location_escaped:
2753                     del resp.headers['Location']
2754                     if sys.version_info < (3, 0):
2755                         location_escaped = location_escaped.encode('utf-8')
2756                     resp.headers['Location'] = location_escaped
2757         return resp
2758
2759     https_request = http_request
2760     https_response = http_response
2761
2762
2763 def make_socks_conn_class(base_class, socks_proxy):
2764     assert issubclass(base_class, (
2765         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767     url_components = compat_urlparse.urlparse(socks_proxy)
2768     if url_components.scheme.lower() == 'socks5':
2769         socks_type = ProxyType.SOCKS5
2770     elif url_components.scheme.lower() in ('socks', 'socks4'):
2771         socks_type = ProxyType.SOCKS4
2772     elif url_components.scheme.lower() == 'socks4a':
2773         socks_type = ProxyType.SOCKS4A
2774
2775     def unquote_if_non_empty(s):
2776         if not s:
2777             return s
2778         return compat_urllib_parse_unquote_plus(s)
2779
2780     proxy_args = (
2781         socks_type,
2782         url_components.hostname, url_components.port or 1080,
2783         True,  # Remote DNS
2784         unquote_if_non_empty(url_components.username),
2785         unquote_if_non_empty(url_components.password),
2786     )
2787
2788     class SocksConnection(base_class):
2789         def connect(self):
2790             self.sock = sockssocket()
2791             self.sock.setproxy(*proxy_args)
2792             if type(self.timeout) in (int, float):
2793                 self.sock.settimeout(self.timeout)
2794             self.sock.connect((self.host, self.port))
2795
2796             if isinstance(self, compat_http_client.HTTPSConnection):
2797                 if hasattr(self, '_context'):  # Python > 2.6
2798                     self.sock = self._context.wrap_socket(
2799                         self.sock, server_hostname=self.host)
2800                 else:
2801                     self.sock = ssl.wrap_socket(self.sock)
2802
2803     return SocksConnection
2804
2805
2806 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810         self._params = params
2811
2812     def https_open(self, req):
2813         kwargs = {}
2814         conn_class = self._https_conn_class
2815
2816         if hasattr(self, '_context'):  # python > 2.6
2817             kwargs['context'] = self._context
2818         if hasattr(self, '_check_hostname'):  # python 3.x
2819             kwargs['check_hostname'] = self._check_hostname
2820
2821         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822         if socks_proxy:
2823             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824             del req.headers['Ytdl-socks-proxy']
2825
2826         return self.do_open(functools.partial(
2827             _create_http_connection, self, conn_class, True),
2828             req, **kwargs)
2829
2830
2831 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2832     """
2833     See [1] for cookie file format.
2834
2835     1. https://curl.haxx.se/docs/http-cookies.html
2836     """
2837     _HTTPONLY_PREFIX = '#HttpOnly_'
2838     _ENTRY_LEN = 7
2839     _HEADER = '''# Netscape HTTP Cookie File
2840 # This file is generated by yt-dlp.  Do not edit.
2841
2842 '''
2843     _CookieFileEntry = collections.namedtuple(
2844         'CookieFileEntry',
2845         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2846
2847     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2848         """
2849         Save cookies to a file.
2850
2851         Most of the code is taken from CPython 3.8 and slightly adapted
2852         to support cookie files with UTF-8 in both python 2 and 3.
2853         """
2854         if filename is None:
2855             if self.filename is not None:
2856                 filename = self.filename
2857             else:
2858                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
2860         # Store session cookies with `expires` set to 0 instead of an empty
2861         # string
2862         for cookie in self:
2863             if cookie.expires is None:
2864                 cookie.expires = 0
2865
2866         with io.open(filename, 'w', encoding='utf-8') as f:
2867             f.write(self._HEADER)
2868             now = time.time()
2869             for cookie in self:
2870                 if not ignore_discard and cookie.discard:
2871                     continue
2872                 if not ignore_expires and cookie.is_expired(now):
2873                     continue
2874                 if cookie.secure:
2875                     secure = 'TRUE'
2876                 else:
2877                     secure = 'FALSE'
2878                 if cookie.domain.startswith('.'):
2879                     initial_dot = 'TRUE'
2880                 else:
2881                     initial_dot = 'FALSE'
2882                 if cookie.expires is not None:
2883                     expires = compat_str(cookie.expires)
2884                 else:
2885                     expires = ''
2886                 if cookie.value is None:
2887                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888                     # with no name, whereas http.cookiejar regards it as a
2889                     # cookie with no value.
2890                     name = ''
2891                     value = cookie.name
2892                 else:
2893                     name = cookie.name
2894                     value = cookie.value
2895                 f.write(
2896                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2897                                secure, expires, name, value]) + '\n')
2898
2899     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2900         """Load cookies from a file."""
2901         if filename is None:
2902             if self.filename is not None:
2903                 filename = self.filename
2904             else:
2905                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
2907         def prepare_line(line):
2908             if line.startswith(self._HTTPONLY_PREFIX):
2909                 line = line[len(self._HTTPONLY_PREFIX):]
2910             # comments and empty lines are fine
2911             if line.startswith('#') or not line.strip():
2912                 return line
2913             cookie_list = line.split('\t')
2914             if len(cookie_list) != self._ENTRY_LEN:
2915                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916             cookie = self._CookieFileEntry(*cookie_list)
2917             if cookie.expires_at and not cookie.expires_at.isdigit():
2918                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919             return line
2920
2921         cf = io.StringIO()
2922         with io.open(filename, encoding='utf-8') as f:
2923             for line in f:
2924                 try:
2925                     cf.write(prepare_line(line))
2926                 except compat_cookiejar.LoadError as e:
2927                     write_string(
2928                         'WARNING: skipping cookie file entry due to %s: %r\n'
2929                         % (e, line), sys.stderr)
2930                     continue
2931         cf.seek(0)
2932         self._really_load(cf, filename, ignore_discard, ignore_expires)
2933         # Session cookies are denoted by either `expires` field set to
2934         # an empty string or 0. MozillaCookieJar only recognizes the former
2935         # (see [1]). So we need force the latter to be recognized as session
2936         # cookies on our own.
2937         # Session cookies may be important for cookies-based authentication,
2938         # e.g. usually, when user does not check 'Remember me' check box while
2939         # logging in on a site, some important cookies are stored as session
2940         # cookies so that not recognizing them will result in failed login.
2941         # 1. https://bugs.python.org/issue17164
2942         for cookie in self:
2943             # Treat `expires=0` cookies as session cookies
2944             if cookie.expires == 0:
2945                 cookie.expires = None
2946                 cookie.discard = True
2947
2948
2949 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950     def __init__(self, cookiejar=None):
2951         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953     def http_response(self, request, response):
2954         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955         # characters in Set-Cookie HTTP header of last response (see
2956         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2957         # In order to at least prevent crashing we will percent encode Set-Cookie
2958         # header before HTTPCookieProcessor starts processing it.
2959         # if sys.version_info < (3, 0) and response.headers:
2960         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961         #         set_cookie = response.headers.get(set_cookie_header)
2962         #         if set_cookie:
2963         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964         #             if set_cookie != set_cookie_escaped:
2965         #                 del response.headers[set_cookie_header]
2966         #                 response.headers[set_cookie_header] = set_cookie_escaped
2967         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
2969     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2970     https_response = http_response
2971
2972
2973 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2974     """YoutubeDL redirect handler
2975
2976     The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978     This redirect handler solves two issues:
2979      - ensures redirect URL is always unicode under python 2
2980      - introduces support for experimental HTTP response status code
2981        308 Permanent Redirect [2] used by some sites [3]
2982
2983     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986     """
2987
2988     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990     def redirect_request(self, req, fp, code, msg, headers, newurl):
2991         """Return a Request or None in response to a redirect.
2992
2993         This is called by the http_error_30x methods when a
2994         redirection response is received.  If a redirection should
2995         take place, return a new Request to allow http_error_30x to
2996         perform the redirect.  Otherwise, raise HTTPError if no-one
2997         else should try to handle this url.  Return None if you can't
2998         but another Handler might.
2999         """
3000         m = req.get_method()
3001         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002                  or code in (301, 302, 303) and m == "POST")):
3003             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004         # Strictly (according to RFC 2616), 301 or 302 in response to
3005         # a POST MUST NOT cause a redirection without confirmation
3006         # from the user (of urllib.request, in this case).  In practice,
3007         # essentially all clients do redirect in this case, so we do
3008         # the same.
3009
3010         # On python 2 urlh.geturl() may sometimes return redirect URL
3011         # as byte string instead of unicode. This workaround allows
3012         # to force it always return unicode.
3013         if sys.version_info[0] < 3:
3014             newurl = compat_str(newurl)
3015
3016         # Be conciliant with URIs containing a space.  This is mainly
3017         # redundant with the more complete encoding done in http_error_302(),
3018         # but it is kept for compatibility with other callers.
3019         newurl = newurl.replace(' ', '%20')
3020
3021         CONTENT_HEADERS = ("content-length", "content-type")
3022         # NB: don't use dict comprehension for python 2.6 compatibility
3023         newheaders = dict((k, v) for k, v in req.headers.items()
3024                           if k.lower() not in CONTENT_HEADERS)
3025         return compat_urllib_request.Request(
3026             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027             unverifiable=True)
3028
3029
3030 def extract_timezone(date_str):
3031     m = re.search(
3032         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033         date_str)
3034     if not m:
3035         timezone = datetime.timedelta()
3036     else:
3037         date_str = date_str[:-len(m.group('tz'))]
3038         if not m.group('sign'):
3039             timezone = datetime.timedelta()
3040         else:
3041             sign = 1 if m.group('sign') == '+' else -1
3042             timezone = datetime.timedelta(
3043                 hours=sign * int(m.group('hours')),
3044                 minutes=sign * int(m.group('minutes')))
3045     return timezone, date_str
3046
3047
3048 def parse_iso8601(date_str, delimiter='T', timezone=None):
3049     """ Return a UNIX timestamp from the given date """
3050
3051     if date_str is None:
3052         return None
3053
3054     date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
3056     if timezone is None:
3057         timezone, date_str = extract_timezone(date_str)
3058
3059     try:
3060         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062         return calendar.timegm(dt.timetuple())
3063     except ValueError:
3064         pass
3065
3066
3067 def date_formats(day_first=True):
3068     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
3071 def unified_strdate(date_str, day_first=True):
3072     """Return a string with the date in the format YYYYMMDD"""
3073
3074     if date_str is None:
3075         return None
3076     upload_date = None
3077     # Replace commas
3078     date_str = date_str.replace(',', ' ')
3079     # Remove AM/PM + timezone
3080     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3081     _, date_str = extract_timezone(date_str)
3082
3083     for expression in date_formats(day_first):
3084         try:
3085             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3086         except ValueError:
3087             pass
3088     if upload_date is None:
3089         timetuple = email.utils.parsedate_tz(date_str)
3090         if timetuple:
3091             try:
3092                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093             except ValueError:
3094                 pass
3095     if upload_date is not None:
3096         return compat_str(upload_date)
3097
3098
3099 def unified_timestamp(date_str, day_first=True):
3100     if date_str is None:
3101         return None
3102
3103     date_str = re.sub(r'[,|]', '', date_str)
3104
3105     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3106     timezone, date_str = extract_timezone(date_str)
3107
3108     # Remove AM/PM + timezone
3109     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
3111     # Remove unrecognized timezones from ISO 8601 alike timestamps
3112     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113     if m:
3114         date_str = date_str[:-len(m.group('tz'))]
3115
3116     # Python only supports microseconds, so remove nanoseconds
3117     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118     if m:
3119         date_str = m.group(1)
3120
3121     for expression in date_formats(day_first):
3122         try:
3123             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3124             return calendar.timegm(dt.timetuple())
3125         except ValueError:
3126             pass
3127     timetuple = email.utils.parsedate_tz(date_str)
3128     if timetuple:
3129         return calendar.timegm(timetuple) + pm_delta * 3600
3130
3131
3132 def determine_ext(url, default_ext='unknown_video'):
3133     if url is None or '.' not in url:
3134         return default_ext
3135     guess = url.partition('?')[0].rpartition('.')[2]
3136     if re.match(r'^[A-Za-z0-9]+$', guess):
3137         return guess
3138     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3140         return guess.rstrip('/')
3141     else:
3142         return default_ext
3143
3144
3145 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3147
3148
3149 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3150     """
3151     Return a datetime object from a string in the format YYYYMMDD or
3152     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154     format: string date format used to return datetime object from
3155     precision: round the time portion of a datetime object.
3156                 auto|microsecond|second|minute|hour|day.
3157                 auto: round to the unit provided in date_str (if applicable).
3158     """
3159     auto_precision = False
3160     if precision == 'auto':
3161         auto_precision = True
3162         precision = 'microsecond'
3163     today = datetime_round(datetime.datetime.now(), precision)
3164     if date_str in ('now', 'today'):
3165         return today
3166     if date_str == 'yesterday':
3167         return today - datetime.timedelta(days=1)
3168     match = re.match(
3169         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170         date_str)
3171     if match is not None:
3172         start_time = datetime_from_str(match.group('start'), precision, format)
3173         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3174         unit = match.group('unit')
3175         if unit == 'month' or unit == 'year':
3176             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3177             unit = 'day'
3178         else:
3179             if unit == 'week':
3180                 unit = 'day'
3181                 time *= 7
3182             delta = datetime.timedelta(**{unit + 's': time})
3183             new_date = start_time + delta
3184         if auto_precision:
3185             return datetime_round(new_date, unit)
3186         return new_date
3187
3188     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191 def date_from_str(date_str, format='%Y%m%d'):
3192     """
3193     Return a datetime object from a string in the format YYYYMMDD or
3194     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196     format: string date format used to return datetime object from
3197     """
3198     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201 def datetime_add_months(dt, months):
3202     """Increment/Decrement a datetime object by months."""
3203     month = dt.month + months - 1
3204     year = dt.year + month // 12
3205     month = month % 12 + 1
3206     day = min(dt.day, calendar.monthrange(year, month)[1])
3207     return dt.replace(year, month, day)
3208
3209
3210 def datetime_round(dt, precision='day'):
3211     """
3212     Round a datetime object's time to a specific precision
3213     """
3214     if precision == 'microsecond':
3215         return dt
3216
3217     unit_seconds = {
3218         'day': 86400,
3219         'hour': 3600,
3220         'minute': 60,
3221         'second': 1,
3222     }
3223     roundto = lambda x, n: ((x + n / 2) // n) * n
3224     timestamp = calendar.timegm(dt.timetuple())
3225     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3226
3227
3228 def hyphenate_date(date_str):
3229     """
3230     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232     if match is not None:
3233         return '-'.join(match.groups())
3234     else:
3235         return date_str
3236
3237
3238 class DateRange(object):
3239     """Represents a time interval between two dates"""
3240
3241     def __init__(self, start=None, end=None):
3242         """start and end must be strings in the format accepted by date"""
3243         if start is not None:
3244             self.start = date_from_str(start)
3245         else:
3246             self.start = datetime.datetime.min.date()
3247         if end is not None:
3248             self.end = date_from_str(end)
3249         else:
3250             self.end = datetime.datetime.max.date()
3251         if self.start > self.end:
3252             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3253
3254     @classmethod
3255     def day(cls, day):
3256         """Returns a range that only contains the given day"""
3257         return cls(day, day)
3258
3259     def __contains__(self, date):
3260         """Check if the date is in the range"""
3261         if not isinstance(date, datetime.date):
3262             date = date_from_str(date)
3263         return self.start <= date <= self.end
3264
3265     def __str__(self):
3266         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3267
3268
3269 def platform_name():
3270     """ Returns the platform name as a compat_str """
3271     res = platform.platform()
3272     if isinstance(res, bytes):
3273         res = res.decode(preferredencoding())
3274
3275     assert isinstance(res, compat_str)
3276     return res
3277
3278
3279 def _windows_write_string(s, out):
3280     """ Returns True if the string was written using special methods,
3281     False if it has yet to be written out."""
3282     # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284     import ctypes
3285     import ctypes.wintypes
3286
3287     WIN_OUTPUT_IDS = {
3288         1: -11,
3289         2: -12,
3290     }
3291
3292     try:
3293         fileno = out.fileno()
3294     except AttributeError:
3295         # If the output stream doesn't have a fileno, it's virtual
3296         return False
3297     except io.UnsupportedOperation:
3298         # Some strange Windows pseudo files?
3299         return False
3300     if fileno not in WIN_OUTPUT_IDS:
3301         return False
3302
3303     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3304         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3305         ('GetStdHandle', ctypes.windll.kernel32))
3306     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
3308     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3309         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3311         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3312     written = ctypes.wintypes.DWORD(0)
3313
3314     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3315     FILE_TYPE_CHAR = 0x0002
3316     FILE_TYPE_REMOTE = 0x8000
3317     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3318         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319         ctypes.POINTER(ctypes.wintypes.DWORD))(
3320         ('GetConsoleMode', ctypes.windll.kernel32))
3321     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323     def not_a_console(handle):
3324         if handle == INVALID_HANDLE_VALUE or handle is None:
3325             return True
3326         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3328
3329     if not_a_console(h):
3330         return False
3331
3332     def next_nonbmp_pos(s):
3333         try:
3334             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335         except StopIteration:
3336             return len(s)
3337
3338     while s:
3339         count = min(next_nonbmp_pos(s), 1024)
3340
3341         ret = WriteConsoleW(
3342             h, s, count if count else 2, ctypes.byref(written), None)
3343         if ret == 0:
3344             raise OSError('Failed to write string')
3345         if not count:  # We just wrote a non-BMP character
3346             assert written.value == 2
3347             s = s[1:]
3348         else:
3349             assert written.value > 0
3350             s = s[written.value:]
3351     return True
3352
3353
3354 def write_string(s, out=None, encoding=None):
3355     if out is None:
3356         out = sys.stderr
3357     assert type(s) == compat_str
3358
3359     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360         if _windows_write_string(s, out):
3361             return
3362
3363     if ('b' in getattr(out, 'mode', '')
3364             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3365         byt = s.encode(encoding or preferredencoding(), 'ignore')
3366         out.write(byt)
3367     elif hasattr(out, 'buffer'):
3368         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369         byt = s.encode(enc, 'ignore')
3370         out.buffer.write(byt)
3371     else:
3372         out.write(s)
3373     out.flush()
3374
3375
3376 def bytes_to_intlist(bs):
3377     if not bs:
3378         return []
3379     if isinstance(bs[0], int):  # Python 3
3380         return list(bs)
3381     else:
3382         return [ord(c) for c in bs]
3383
3384
3385 def intlist_to_bytes(xs):
3386     if not xs:
3387         return b''
3388     return compat_struct_pack('%dB' % len(xs), *xs)
3389
3390
3391 # Cross-platform file locking
3392 if sys.platform == 'win32':
3393     import ctypes.wintypes
3394     import msvcrt
3395
3396     class OVERLAPPED(ctypes.Structure):
3397         _fields_ = [
3398             ('Internal', ctypes.wintypes.LPVOID),
3399             ('InternalHigh', ctypes.wintypes.LPVOID),
3400             ('Offset', ctypes.wintypes.DWORD),
3401             ('OffsetHigh', ctypes.wintypes.DWORD),
3402             ('hEvent', ctypes.wintypes.HANDLE),
3403         ]
3404
3405     kernel32 = ctypes.windll.kernel32
3406     LockFileEx = kernel32.LockFileEx
3407     LockFileEx.argtypes = [
3408         ctypes.wintypes.HANDLE,     # hFile
3409         ctypes.wintypes.DWORD,      # dwFlags
3410         ctypes.wintypes.DWORD,      # dwReserved
3411         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3412         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3413         ctypes.POINTER(OVERLAPPED)  # Overlapped
3414     ]
3415     LockFileEx.restype = ctypes.wintypes.BOOL
3416     UnlockFileEx = kernel32.UnlockFileEx
3417     UnlockFileEx.argtypes = [
3418         ctypes.wintypes.HANDLE,     # hFile
3419         ctypes.wintypes.DWORD,      # dwReserved
3420         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3421         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3422         ctypes.POINTER(OVERLAPPED)  # Overlapped
3423     ]
3424     UnlockFileEx.restype = ctypes.wintypes.BOOL
3425     whole_low = 0xffffffff
3426     whole_high = 0x7fffffff
3427
3428     def _lock_file(f, exclusive):
3429         overlapped = OVERLAPPED()
3430         overlapped.Offset = 0
3431         overlapped.OffsetHigh = 0
3432         overlapped.hEvent = 0
3433         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434         handle = msvcrt.get_osfhandle(f.fileno())
3435         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436                           whole_low, whole_high, f._lock_file_overlapped_p):
3437             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439     def _unlock_file(f):
3440         assert f._lock_file_overlapped_p
3441         handle = msvcrt.get_osfhandle(f.fileno())
3442         if not UnlockFileEx(handle, 0,
3443                             whole_low, whole_high, f._lock_file_overlapped_p):
3444             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446 else:
3447     # Some platforms, such as Jython, is missing fcntl
3448     try:
3449         import fcntl
3450
3451         def _lock_file(f, exclusive):
3452             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3453
3454         def _unlock_file(f):
3455             fcntl.flock(f, fcntl.LOCK_UN)
3456     except ImportError:
3457         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459         def _lock_file(f, exclusive):
3460             raise IOError(UNSUPPORTED_MSG)
3461
3462         def _unlock_file(f):
3463             raise IOError(UNSUPPORTED_MSG)
3464
3465
3466 class locked_file(object):
3467     def __init__(self, filename, mode, encoding=None):
3468         assert mode in ['r', 'a', 'w']
3469         self.f = io.open(filename, mode, encoding=encoding)
3470         self.mode = mode
3471
3472     def __enter__(self):
3473         exclusive = self.mode != 'r'
3474         try:
3475             _lock_file(self.f, exclusive)
3476         except IOError:
3477             self.f.close()
3478             raise
3479         return self
3480
3481     def __exit__(self, etype, value, traceback):
3482         try:
3483             _unlock_file(self.f)
3484         finally:
3485             self.f.close()
3486
3487     def __iter__(self):
3488         return iter(self.f)
3489
3490     def write(self, *args):
3491         return self.f.write(*args)
3492
3493     def read(self, *args):
3494         return self.f.read(*args)
3495
3496
3497 def get_filesystem_encoding():
3498     encoding = sys.getfilesystemencoding()
3499     return encoding if encoding is not None else 'utf-8'
3500
3501
3502 def shell_quote(args):
3503     quoted_args = []
3504     encoding = get_filesystem_encoding()
3505     for a in args:
3506         if isinstance(a, bytes):
3507             # We may get a filename encoded with 'encodeFilename'
3508             a = a.decode(encoding)
3509         quoted_args.append(compat_shlex_quote(a))
3510     return ' '.join(quoted_args)
3511
3512
3513 def smuggle_url(url, data):
3514     """ Pass additional data in a URL for internal use. """
3515
3516     url, idata = unsmuggle_url(url, {})
3517     data.update(idata)
3518     sdata = compat_urllib_parse_urlencode(
3519         {'__youtubedl_smuggle': json.dumps(data)})
3520     return url + '#' + sdata
3521
3522
3523 def unsmuggle_url(smug_url, default=None):
3524     if '#__youtubedl_smuggle' not in smug_url:
3525         return smug_url, default
3526     url, _, sdata = smug_url.rpartition('#')
3527     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3528     data = json.loads(jsond)
3529     return url, data
3530
3531
3532 def format_bytes(bytes):
3533     if bytes is None:
3534         return 'N/A'
3535     if type(bytes) is str:
3536         bytes = float(bytes)
3537     if bytes == 0.0:
3538         exponent = 0
3539     else:
3540         exponent = int(math.log(bytes, 1024.0))
3541     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3542     converted = float(bytes) / float(1024 ** exponent)
3543     return '%.2f%s' % (converted, suffix)
3544
3545
3546 def lookup_unit_table(unit_table, s):
3547     units_re = '|'.join(re.escape(u) for u in unit_table)
3548     m = re.match(
3549         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3550     if not m:
3551         return None
3552     num_str = m.group('num').replace(',', '.')
3553     mult = unit_table[m.group('unit')]
3554     return int(float(num_str) * mult)
3555
3556
3557 def parse_filesize(s):
3558     if s is None:
3559         return None
3560
3561     # The lower-case forms are of course incorrect and unofficial,
3562     # but we support those too
3563     _UNIT_TABLE = {
3564         'B': 1,
3565         'b': 1,
3566         'bytes': 1,
3567         'KiB': 1024,
3568         'KB': 1000,
3569         'kB': 1024,
3570         'Kb': 1000,
3571         'kb': 1000,
3572         'kilobytes': 1000,
3573         'kibibytes': 1024,
3574         'MiB': 1024 ** 2,
3575         'MB': 1000 ** 2,
3576         'mB': 1024 ** 2,
3577         'Mb': 1000 ** 2,
3578         'mb': 1000 ** 2,
3579         'megabytes': 1000 ** 2,
3580         'mebibytes': 1024 ** 2,
3581         'GiB': 1024 ** 3,
3582         'GB': 1000 ** 3,
3583         'gB': 1024 ** 3,
3584         'Gb': 1000 ** 3,
3585         'gb': 1000 ** 3,
3586         'gigabytes': 1000 ** 3,
3587         'gibibytes': 1024 ** 3,
3588         'TiB': 1024 ** 4,
3589         'TB': 1000 ** 4,
3590         'tB': 1024 ** 4,
3591         'Tb': 1000 ** 4,
3592         'tb': 1000 ** 4,
3593         'terabytes': 1000 ** 4,
3594         'tebibytes': 1024 ** 4,
3595         'PiB': 1024 ** 5,
3596         'PB': 1000 ** 5,
3597         'pB': 1024 ** 5,
3598         'Pb': 1000 ** 5,
3599         'pb': 1000 ** 5,
3600         'petabytes': 1000 ** 5,
3601         'pebibytes': 1024 ** 5,
3602         'EiB': 1024 ** 6,
3603         'EB': 1000 ** 6,
3604         'eB': 1024 ** 6,
3605         'Eb': 1000 ** 6,
3606         'eb': 1000 ** 6,
3607         'exabytes': 1000 ** 6,
3608         'exbibytes': 1024 ** 6,
3609         'ZiB': 1024 ** 7,
3610         'ZB': 1000 ** 7,
3611         'zB': 1024 ** 7,
3612         'Zb': 1000 ** 7,
3613         'zb': 1000 ** 7,
3614         'zettabytes': 1000 ** 7,
3615         'zebibytes': 1024 ** 7,
3616         'YiB': 1024 ** 8,
3617         'YB': 1000 ** 8,
3618         'yB': 1024 ** 8,
3619         'Yb': 1000 ** 8,
3620         'yb': 1000 ** 8,
3621         'yottabytes': 1000 ** 8,
3622         'yobibytes': 1024 ** 8,
3623     }
3624
3625     return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628 def parse_count(s):
3629     if s is None:
3630         return None
3631
3632     s = s.strip()
3633
3634     if re.match(r'^[\d,.]+$', s):
3635         return str_to_int(s)
3636
3637     _UNIT_TABLE = {
3638         'k': 1000,
3639         'K': 1000,
3640         'm': 1000 ** 2,
3641         'M': 1000 ** 2,
3642         'kk': 1000 ** 2,
3643         'KK': 1000 ** 2,
3644     }
3645
3646     return lookup_unit_table(_UNIT_TABLE, s)
3647
3648
3649 def parse_resolution(s):
3650     if s is None:
3651         return {}
3652
3653     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654     if mobj:
3655         return {
3656             'width': int(mobj.group('w')),
3657             'height': int(mobj.group('h')),
3658         }
3659
3660     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661     if mobj:
3662         return {'height': int(mobj.group(1))}
3663
3664     mobj = re.search(r'\b([48])[kK]\b', s)
3665     if mobj:
3666         return {'height': int(mobj.group(1)) * 540}
3667
3668     return {}
3669
3670
3671 def parse_bitrate(s):
3672     if not isinstance(s, compat_str):
3673         return
3674     mobj = re.search(r'\b(\d+)\s*kbps', s)
3675     if mobj:
3676         return int(mobj.group(1))
3677
3678
3679 def month_by_name(name, lang='en'):
3680     """ Return the number of a month by (locale-independently) English name """
3681
3682     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3683
3684     try:
3685         return month_names.index(name) + 1
3686     except ValueError:
3687         return None
3688
3689
3690 def month_by_abbreviation(abbrev):
3691     """ Return the number of a month by (locale-independently) English
3692         abbreviations """
3693
3694     try:
3695         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3696     except ValueError:
3697         return None
3698
3699
3700 def fix_xml_ampersands(xml_str):
3701     """Replace all the '&' by '&amp;' in XML"""
3702     return re.sub(
3703         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3704         '&amp;',
3705         xml_str)
3706
3707
3708 def setproctitle(title):
3709     assert isinstance(title, compat_str)
3710
3711     # ctypes in Jython is not complete
3712     # http://bugs.jython.org/issue2148
3713     if sys.platform.startswith('java'):
3714         return
3715
3716     try:
3717         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3718     except OSError:
3719         return
3720     except TypeError:
3721         # LoadLibrary in Windows Python 2.7.13 only expects
3722         # a bytestring, but since unicode_literals turns
3723         # every string into a unicode string, it fails.
3724         return
3725     title_bytes = title.encode('utf-8')
3726     buf = ctypes.create_string_buffer(len(title_bytes))
3727     buf.value = title_bytes
3728     try:
3729         libc.prctl(15, buf, 0, 0, 0)
3730     except AttributeError:
3731         return  # Strange libc, just skip this
3732
3733
3734 def remove_start(s, start):
3735     return s[len(start):] if s is not None and s.startswith(start) else s
3736
3737
3738 def remove_end(s, end):
3739     return s[:-len(end)] if s is not None and s.endswith(end) else s
3740
3741
3742 def remove_quotes(s):
3743     if s is None or len(s) < 2:
3744         return s
3745     for quote in ('"', "'", ):
3746         if s[0] == quote and s[-1] == quote:
3747             return s[1:-1]
3748     return s
3749
3750
3751 def get_domain(url):
3752     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753     return domain.group('domain') if domain else None
3754
3755
3756 def url_basename(url):
3757     path = compat_urlparse.urlparse(url).path
3758     return path.strip('/').split('/')[-1]
3759
3760
3761 def base_url(url):
3762     return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
3765 def urljoin(base, path):
3766     if isinstance(path, bytes):
3767         path = path.decode('utf-8')
3768     if not isinstance(path, compat_str) or not path:
3769         return None
3770     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3771         return path
3772     if isinstance(base, bytes):
3773         base = base.decode('utf-8')
3774     if not isinstance(base, compat_str) or not re.match(
3775             r'^(?:https?:)?//', base):
3776         return None
3777     return compat_urlparse.urljoin(base, path)
3778
3779
3780 class HEADRequest(compat_urllib_request.Request):
3781     def get_method(self):
3782         return 'HEAD'
3783
3784
3785 class PUTRequest(compat_urllib_request.Request):
3786     def get_method(self):
3787         return 'PUT'
3788
3789
3790 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3791     if get_attr:
3792         if v is not None:
3793             v = getattr(v, get_attr, None)
3794     if v == '':
3795         v = None
3796     if v is None:
3797         return default
3798     try:
3799         return int(v) * invscale // scale
3800     except (ValueError, TypeError):
3801         return default
3802
3803
3804 def str_or_none(v, default=None):
3805     return default if v is None else compat_str(v)
3806
3807
3808 def str_to_int(int_str):
3809     """ A more relaxed version of int_or_none """
3810     if isinstance(int_str, compat_integer_types):
3811         return int_str
3812     elif isinstance(int_str, compat_str):
3813         int_str = re.sub(r'[,\.\+]', '', int_str)
3814         return int_or_none(int_str)
3815
3816
3817 def float_or_none(v, scale=1, invscale=1, default=None):
3818     if v is None:
3819         return default
3820     try:
3821         return float(v) * invscale / scale
3822     except (ValueError, TypeError):
3823         return default
3824
3825
3826 def bool_or_none(v, default=None):
3827     return v if isinstance(v, bool) else default
3828
3829
3830 def strip_or_none(v, default=None):
3831     return v.strip() if isinstance(v, compat_str) else default
3832
3833
3834 def url_or_none(url):
3835     if not url or not isinstance(url, compat_str):
3836         return None
3837     url = url.strip()
3838     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3839
3840
3841 def strftime_or_none(timestamp, date_format, default=None):
3842     datetime_object = None
3843     try:
3844         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3845             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3847             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848         return datetime_object.strftime(date_format)
3849     except (ValueError, TypeError, AttributeError):
3850         return default
3851
3852
3853 def parse_duration(s):
3854     if not isinstance(s, compat_basestring):
3855         return None
3856
3857     s = s.strip()
3858
3859     days, hours, mins, secs, ms = [None] * 5
3860     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3861     if m:
3862         days, hours, mins, secs, ms = m.groups()
3863     else:
3864         m = re.match(
3865             r'''(?ix)(?:P?
3866                 (?:
3867                     [0-9]+\s*y(?:ears?)?\s*
3868                 )?
3869                 (?:
3870                     [0-9]+\s*m(?:onths?)?\s*
3871                 )?
3872                 (?:
3873                     [0-9]+\s*w(?:eeks?)?\s*
3874                 )?
3875                 (?:
3876                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3877                 )?
3878                 T)?
3879                 (?:
3880                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881                 )?
3882                 (?:
3883                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884                 )?
3885                 (?:
3886                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3887                 )?Z?$''', s)
3888         if m:
3889             days, hours, mins, secs, ms = m.groups()
3890         else:
3891             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3892             if m:
3893                 hours, mins = m.groups()
3894             else:
3895                 return None
3896
3897     duration = 0
3898     if secs:
3899         duration += float(secs)
3900     if mins:
3901         duration += float(mins) * 60
3902     if hours:
3903         duration += float(hours) * 60 * 60
3904     if days:
3905         duration += float(days) * 24 * 60 * 60
3906     if ms:
3907         duration += float(ms)
3908     return duration
3909
3910
3911 def prepend_extension(filename, ext, expected_real_ext=None):
3912     name, real_ext = os.path.splitext(filename)
3913     return (
3914         '{0}.{1}{2}'.format(name, ext, real_ext)
3915         if not expected_real_ext or real_ext[1:] == expected_real_ext
3916         else '{0}.{1}'.format(filename, ext))
3917
3918
3919 def replace_extension(filename, ext, expected_real_ext=None):
3920     name, real_ext = os.path.splitext(filename)
3921     return '{0}.{1}'.format(
3922         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923         ext)
3924
3925
3926 def check_executable(exe, args=[]):
3927     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928     args can be a list of arguments for a short output (like -version) """
3929     try:
3930         process_communicate_or_kill(subprocess.Popen(
3931             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3932     except OSError:
3933         return False
3934     return exe
3935
3936
3937 def get_exe_version(exe, args=['--version'],
3938                     version_re=None, unrecognized='present'):
3939     """ Returns the version of the specified executable,
3940     or False if the executable is not present """
3941     try:
3942         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3943         # SIGTTOU if yt-dlp is run in the background.
3944         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3945         out, _ = process_communicate_or_kill(subprocess.Popen(
3946             [encodeArgument(exe)] + args,
3947             stdin=subprocess.PIPE,
3948             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3949     except OSError:
3950         return False
3951     if isinstance(out, bytes):  # Python 2.x
3952         out = out.decode('ascii', 'ignore')
3953     return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956 def detect_exe_version(output, version_re=None, unrecognized='present'):
3957     assert isinstance(output, compat_str)
3958     if version_re is None:
3959         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960     m = re.search(version_re, output)
3961     if m:
3962         return m.group(1)
3963     else:
3964         return unrecognized
3965
3966
3967 class LazyList(collections.abc.Sequence):
3968     ''' Lazy immutable list from an iterable
3969     Note that slices of a LazyList are lists and not LazyList'''
3970
3971     def __init__(self, iterable):
3972         self.__iterable = iter(iterable)
3973         self.__cache = []
3974         self.__reversed = False
3975
3976     def __iter__(self):
3977         if self.__reversed:
3978             # We need to consume the entire iterable to iterate in reverse
3979             yield from self.exhaust()
3980             return
3981         yield from self.__cache
3982         for item in self.__iterable:
3983             self.__cache.append(item)
3984             yield item
3985
3986     def __exhaust(self):
3987         self.__cache.extend(self.__iterable)
3988         return self.__cache
3989
3990     def exhaust(self):
3991         ''' Evaluate the entire iterable '''
3992         return self.__exhaust()[::-1 if self.__reversed else 1]
3993
3994     @staticmethod
3995     def __reverse_index(x):
3996         return None if x is None else -(x + 1)
3997
3998     def __getitem__(self, idx):
3999         if isinstance(idx, slice):
4000             if self.__reversed:
4001                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4002             start, stop, step = idx.start, idx.stop, idx.step or 1
4003         elif isinstance(idx, int):
4004             if self.__reversed:
4005                 idx = self.__reverse_index(idx)
4006             start, stop, step = idx, idx, 0
4007         else:
4008             raise TypeError('indices must be integers or slices')
4009         if ((start or 0) < 0 or (stop or 0) < 0
4010                 or (start is None and step < 0)
4011                 or (stop is None and step > 0)):
4012             # We need to consume the entire iterable to be able to slice from the end
4013             # Obviously, never use this with infinite iterables
4014             return self.__exhaust()[idx]
4015
4016         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4017         if n > 0:
4018             self.__cache.extend(itertools.islice(self.__iterable, n))
4019         return self.__cache[idx]
4020
4021     def __bool__(self):
4022         try:
4023             self[-1] if self.__reversed else self[0]
4024         except IndexError:
4025             return False
4026         return True
4027
4028     def __len__(self):
4029         self.exhaust()
4030         return len(self.__cache)
4031
4032     def reverse(self):
4033         self.__reversed = not self.__reversed
4034         return self
4035
4036     def __repr__(self):
4037         # repr and str should mimic a list. So we exhaust the iterable
4038         return repr(self.exhaust())
4039
4040     def __str__(self):
4041         return repr(self.exhaust())
4042
4043
4044 class PagedList:
4045     def __len__(self):
4046         # This is only useful for tests
4047         return len(self.getslice())
4048
4049     def __init__(self, pagefunc, pagesize, use_cache=True):
4050         self._pagefunc = pagefunc
4051         self._pagesize = pagesize
4052         self._use_cache = use_cache
4053         self._cache = {}
4054
4055     def getpage(self, pagenum):
4056         page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4057         if self._use_cache:
4058             self._cache[pagenum] = page_results
4059         return page_results
4060
4061     def getslice(self, start=0, end=None):
4062         return list(self._getslice(start, end))
4063
4064     def _getslice(self, start, end):
4065         raise NotImplementedError('This method must be implemented by subclasses')
4066
4067     def __getitem__(self, idx):
4068         # NOTE: cache must be enabled if this is used
4069         if not isinstance(idx, int) or idx < 0:
4070             raise TypeError('indices must be non-negative integers')
4071         entries = self.getslice(idx, idx + 1)
4072         return entries[0] if entries else None
4073
4074
4075 class OnDemandPagedList(PagedList):
4076     def _getslice(self, start, end):
4077         for pagenum in itertools.count(start // self._pagesize):
4078             firstid = pagenum * self._pagesize
4079             nextfirstid = pagenum * self._pagesize + self._pagesize
4080             if start >= nextfirstid:
4081                 continue
4082
4083             startv = (
4084                 start % self._pagesize
4085                 if firstid <= start < nextfirstid
4086                 else 0)
4087             endv = (
4088                 ((end - 1) % self._pagesize) + 1
4089                 if (end is not None and firstid <= end <= nextfirstid)
4090                 else None)
4091
4092             page_results = self.getpage(pagenum)
4093             if startv != 0 or endv is not None:
4094                 page_results = page_results[startv:endv]
4095             yield from page_results
4096
4097             # A little optimization - if current page is not "full", ie. does
4098             # not contain page_size videos then we can assume that this page
4099             # is the last one - there are no more ids on further pages -
4100             # i.e. no need to query again.
4101             if len(page_results) + startv < self._pagesize:
4102                 break
4103
4104             # If we got the whole page, but the next page is not interesting,
4105             # break out early as well
4106             if end == nextfirstid:
4107                 break
4108
4109
4110 class InAdvancePagedList(PagedList):
4111     def __init__(self, pagefunc, pagecount, pagesize):
4112         self._pagecount = pagecount
4113         PagedList.__init__(self, pagefunc, pagesize, True)
4114
4115     def _getslice(self, start, end):
4116         start_page = start // self._pagesize
4117         end_page = (
4118             self._pagecount if end is None else (end // self._pagesize + 1))
4119         skip_elems = start - start_page * self._pagesize
4120         only_more = None if end is None else end - start
4121         for pagenum in range(start_page, end_page):
4122             page_results = self.getpage(pagenum)
4123             if skip_elems:
4124                 page_results = page_results[skip_elems:]
4125                 skip_elems = None
4126             if only_more is not None:
4127                 if len(page_results) < only_more:
4128                     only_more -= len(page_results)
4129                 else:
4130                     yield from page_results[:only_more]
4131                     break
4132             yield from page_results
4133
4134
4135 def uppercase_escape(s):
4136     unicode_escape = codecs.getdecoder('unicode_escape')
4137     return re.sub(
4138         r'\\U[0-9a-fA-F]{8}',
4139         lambda m: unicode_escape(m.group(0))[0],
4140         s)
4141
4142
4143 def lowercase_escape(s):
4144     unicode_escape = codecs.getdecoder('unicode_escape')
4145     return re.sub(
4146         r'\\u[0-9a-fA-F]{4}',
4147         lambda m: unicode_escape(m.group(0))[0],
4148         s)
4149
4150
4151 def escape_rfc3986(s):
4152     """Escape non-ASCII characters as suggested by RFC 3986"""
4153     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4154         s = s.encode('utf-8')
4155     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4156
4157
4158 def escape_url(url):
4159     """Escape URL as suggested by RFC 3986"""
4160     url_parsed = compat_urllib_parse_urlparse(url)
4161     return url_parsed._replace(
4162         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4163         path=escape_rfc3986(url_parsed.path),
4164         params=escape_rfc3986(url_parsed.params),
4165         query=escape_rfc3986(url_parsed.query),
4166         fragment=escape_rfc3986(url_parsed.fragment)
4167     ).geturl()
4168
4169
4170 def read_batch_urls(batch_fd):
4171     def fixup(url):
4172         if not isinstance(url, compat_str):
4173             url = url.decode('utf-8', 'replace')
4174         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4175         for bom in BOM_UTF8:
4176             if url.startswith(bom):
4177                 url = url[len(bom):]
4178         url = url.lstrip()
4179         if not url or url.startswith(('#', ';', ']')):
4180             return False
4181         # "#" cannot be stripped out since it is part of the URI
4182         # However, it can be safely stipped out if follwing a whitespace
4183         return re.split(r'\s#', url, 1)[0].rstrip()
4184
4185     with contextlib.closing(batch_fd) as fd:
4186         return [url for url in map(fixup, fd) if url]
4187
4188
4189 def urlencode_postdata(*args, **kargs):
4190     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4191
4192
4193 def update_url_query(url, query):
4194     if not query:
4195         return url
4196     parsed_url = compat_urlparse.urlparse(url)
4197     qs = compat_parse_qs(parsed_url.query)
4198     qs.update(query)
4199     return compat_urlparse.urlunparse(parsed_url._replace(
4200         query=compat_urllib_parse_urlencode(qs, True)))
4201
4202
4203 def update_Request(req, url=None, data=None, headers={}, query={}):
4204     req_headers = req.headers.copy()
4205     req_headers.update(headers)
4206     req_data = data or req.data
4207     req_url = update_url_query(url or req.get_full_url(), query)
4208     req_get_method = req.get_method()
4209     if req_get_method == 'HEAD':
4210         req_type = HEADRequest
4211     elif req_get_method == 'PUT':
4212         req_type = PUTRequest
4213     else:
4214         req_type = compat_urllib_request.Request
4215     new_req = req_type(
4216         req_url, data=req_data, headers=req_headers,
4217         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4218     if hasattr(req, 'timeout'):
4219         new_req.timeout = req.timeout
4220     return new_req
4221
4222
4223 def _multipart_encode_impl(data, boundary):
4224     content_type = 'multipart/form-data; boundary=%s' % boundary
4225
4226     out = b''
4227     for k, v in data.items():
4228         out += b'--' + boundary.encode('ascii') + b'\r\n'
4229         if isinstance(k, compat_str):
4230             k = k.encode('utf-8')
4231         if isinstance(v, compat_str):
4232             v = v.encode('utf-8')
4233         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4234         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4235         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4236         if boundary.encode('ascii') in content:
4237             raise ValueError('Boundary overlaps with data')
4238         out += content
4239
4240     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4241
4242     return out, content_type
4243
4244
4245 def multipart_encode(data, boundary=None):
4246     '''
4247     Encode a dict to RFC 7578-compliant form-data
4248
4249     data:
4250         A dict where keys and values can be either Unicode or bytes-like
4251         objects.
4252     boundary:
4253         If specified a Unicode object, it's used as the boundary. Otherwise
4254         a random boundary is generated.
4255
4256     Reference: https://tools.ietf.org/html/rfc7578
4257     '''
4258     has_specified_boundary = boundary is not None
4259
4260     while True:
4261         if boundary is None:
4262             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4263
4264         try:
4265             out, content_type = _multipart_encode_impl(data, boundary)
4266             break
4267         except ValueError:
4268             if has_specified_boundary:
4269                 raise
4270             boundary = None
4271
4272     return out, content_type
4273
4274
4275 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4276     if isinstance(key_or_keys, (list, tuple)):
4277         for key in key_or_keys:
4278             if key not in d or d[key] is None or skip_false_values and not d[key]:
4279                 continue
4280             return d[key]
4281         return default
4282     return d.get(key_or_keys, default)
4283
4284
4285 def try_get(src, getter, expected_type=None):
4286     for get in variadic(getter):
4287         try:
4288             v = get(src)
4289         except (AttributeError, KeyError, TypeError, IndexError):
4290             pass
4291         else:
4292             if expected_type is None or isinstance(v, expected_type):
4293                 return v
4294
4295
4296 def merge_dicts(*dicts):
4297     merged = {}
4298     for a_dict in dicts:
4299         for k, v in a_dict.items():
4300             if v is None:
4301                 continue
4302             if (k not in merged
4303                     or (isinstance(v, compat_str) and v
4304                         and isinstance(merged[k], compat_str)
4305                         and not merged[k])):
4306                 merged[k] = v
4307     return merged
4308
4309
4310 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4311     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4312
4313
4314 US_RATINGS = {
4315     'G': 0,
4316     'PG': 10,
4317     'PG-13': 13,
4318     'R': 16,
4319     'NC': 18,
4320 }
4321
4322
4323 TV_PARENTAL_GUIDELINES = {
4324     'TV-Y': 0,
4325     'TV-Y7': 7,
4326     'TV-G': 0,
4327     'TV-PG': 0,
4328     'TV-14': 14,
4329     'TV-MA': 17,
4330 }
4331
4332
4333 def parse_age_limit(s):
4334     if type(s) == int:
4335         return s if 0 <= s <= 21 else None
4336     if not isinstance(s, compat_basestring):
4337         return None
4338     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4339     if m:
4340         return int(m.group('age'))
4341     s = s.upper()
4342     if s in US_RATINGS:
4343         return US_RATINGS[s]
4344     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4345     if m:
4346         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4347     return None
4348
4349
4350 def strip_jsonp(code):
4351     return re.sub(
4352         r'''(?sx)^
4353             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4354             (?:\s*&&\s*(?P=func_name))?
4355             \s*\(\s*(?P<callback_data>.*)\);?
4356             \s*?(?://[^\n]*)*$''',
4357         r'\g<callback_data>', code)
4358
4359
4360 def js_to_json(code, vars={}):
4361     # vars is a dict of var, val pairs to substitute
4362     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4363     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4364     INTEGER_TABLE = (
4365         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4366         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4367     )
4368
4369     def fix_kv(m):
4370         v = m.group(0)
4371         if v in ('true', 'false', 'null'):
4372             return v
4373         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4374             return ""
4375
4376         if v[0] in ("'", '"'):
4377             v = re.sub(r'(?s)\\.|"', lambda m: {
4378                 '"': '\\"',
4379                 "\\'": "'",
4380                 '\\\n': '',
4381                 '\\x': '\\u00',
4382             }.get(m.group(0), m.group(0)), v[1:-1])
4383         else:
4384             for regex, base in INTEGER_TABLE:
4385                 im = re.match(regex, v)
4386                 if im:
4387                     i = int(im.group(1), base)
4388                     return '"%d":' % i if v.endswith(':') else '%d' % i
4389
4390             if v in vars:
4391                 return vars[v]
4392
4393         return '"%s"' % v
4394
4395     return re.sub(r'''(?sx)
4396         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4397         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4398         {comment}|,(?={skip}[\]}}])|
4399         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4400         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4401         [0-9]+(?={skip}:)|
4402         !+
4403         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4404
4405
4406 def qualities(quality_ids):
4407     """ Get a numeric quality value out of a list of possible values """
4408     def q(qid):
4409         try:
4410             return quality_ids.index(qid)
4411         except ValueError:
4412             return -1
4413     return q
4414
4415
4416 DEFAULT_OUTTMPL = {
4417     'default': '%(title)s [%(id)s].%(ext)s',
4418     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4419 }
4420 OUTTMPL_TYPES = {
4421     'chapter': None,
4422     'subtitle': None,
4423     'thumbnail': None,
4424     'description': 'description',
4425     'annotation': 'annotations.xml',
4426     'infojson': 'info.json',
4427     'pl_thumbnail': None,
4428     'pl_description': 'description',
4429     'pl_infojson': 'info.json',
4430 }
4431
4432 # As of [1] format syntax is:
4433 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4434 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4435 STR_FORMAT_RE_TMPL = r'''(?x)
4436     (?<!%)(?P<prefix>(?:%%)*)
4437     %
4438     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4439     (?P<format>
4440         (?:[#0\-+ ]+)?  # conversion flags (optional)
4441         (?:\d+)?  # minimum field width (optional)
4442         (?:\.\d+)?  # precision (optional)
4443         [hlL]?  # length modifier (optional)
4444         {1}  # conversion type
4445     )
4446 '''
4447
4448
4449 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4450
4451
4452 def limit_length(s, length):
4453     """ Add ellipses to overly long strings """
4454     if s is None:
4455         return None
4456     ELLIPSES = '...'
4457     if len(s) > length:
4458         return s[:length - len(ELLIPSES)] + ELLIPSES
4459     return s
4460
4461
4462 def version_tuple(v):
4463     return tuple(int(e) for e in re.split(r'[-.]', v))
4464
4465
4466 def is_outdated_version(version, limit, assume_new=True):
4467     if not version:
4468         return not assume_new
4469     try:
4470         return version_tuple(version) < version_tuple(limit)
4471     except ValueError:
4472         return not assume_new
4473
4474
4475 def ytdl_is_updateable():
4476     """ Returns if yt-dlp can be updated with -U """
4477     return False
4478
4479     from zipimport import zipimporter
4480
4481     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4482
4483
4484 def args_to_str(args):
4485     # Get a short string representation for a subprocess command
4486     return ' '.join(compat_shlex_quote(a) for a in args)
4487
4488
4489 def error_to_compat_str(err):
4490     err_str = str(err)
4491     # On python 2 error byte string must be decoded with proper
4492     # encoding rather than ascii
4493     if sys.version_info[0] < 3:
4494         err_str = err_str.decode(preferredencoding())
4495     return err_str
4496
4497
4498 def mimetype2ext(mt):
4499     if mt is None:
4500         return None
4501
4502     ext = {
4503         'audio/mp4': 'm4a',
4504         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4505         # it's the most popular one
4506         'audio/mpeg': 'mp3',
4507         'audio/x-wav': 'wav',
4508     }.get(mt)
4509     if ext is not None:
4510         return ext
4511
4512     _, _, res = mt.rpartition('/')
4513     res = res.split(';')[0].strip().lower()
4514
4515     return {
4516         '3gpp': '3gp',
4517         'smptett+xml': 'tt',
4518         'ttaf+xml': 'dfxp',
4519         'ttml+xml': 'ttml',
4520         'x-flv': 'flv',
4521         'x-mp4-fragmented': 'mp4',
4522         'x-ms-sami': 'sami',
4523         'x-ms-wmv': 'wmv',
4524         'mpegurl': 'm3u8',
4525         'x-mpegurl': 'm3u8',
4526         'vnd.apple.mpegurl': 'm3u8',
4527         'dash+xml': 'mpd',
4528         'f4m+xml': 'f4m',
4529         'hds+xml': 'f4m',
4530         'vnd.ms-sstr+xml': 'ism',
4531         'quicktime': 'mov',
4532         'mp2t': 'ts',
4533         'x-wav': 'wav',
4534     }.get(res, res)
4535
4536
4537 def parse_codecs(codecs_str):
4538     # http://tools.ietf.org/html/rfc6381
4539     if not codecs_str:
4540         return {}
4541     split_codecs = list(filter(None, map(
4542         str.strip, codecs_str.strip().strip(',').split(','))))
4543     vcodec, acodec = None, None
4544     for full_codec in split_codecs:
4545         codec = full_codec.split('.')[0]
4546         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4547             if not vcodec:
4548                 vcodec = full_codec
4549         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4550             if not acodec:
4551                 acodec = full_codec
4552         else:
4553             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4554     if not vcodec and not acodec:
4555         if len(split_codecs) == 2:
4556             return {
4557                 'vcodec': split_codecs[0],
4558                 'acodec': split_codecs[1],
4559             }
4560     else:
4561         return {
4562             'vcodec': vcodec or 'none',
4563             'acodec': acodec or 'none',
4564         }
4565     return {}
4566
4567
4568 def urlhandle_detect_ext(url_handle):
4569     getheader = url_handle.headers.get
4570
4571     cd = getheader('Content-Disposition')
4572     if cd:
4573         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4574         if m:
4575             e = determine_ext(m.group('filename'), default_ext=None)
4576             if e:
4577                 return e
4578
4579     return mimetype2ext(getheader('Content-Type'))
4580
4581
4582 def encode_data_uri(data, mime_type):
4583     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4584
4585
4586 def age_restricted(content_limit, age_limit):
4587     """ Returns True iff the content should be blocked """
4588
4589     if age_limit is None:  # No limit set
4590         return False
4591     if content_limit is None:
4592         return False  # Content available for everyone
4593     return age_limit < content_limit
4594
4595
4596 def is_html(first_bytes):
4597     """ Detect whether a file contains HTML by examining its first bytes. """
4598
4599     BOMS = [
4600         (b'\xef\xbb\xbf', 'utf-8'),
4601         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4602         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4603         (b'\xff\xfe', 'utf-16-le'),
4604         (b'\xfe\xff', 'utf-16-be'),
4605     ]
4606     for bom, enc in BOMS:
4607         if first_bytes.startswith(bom):
4608             s = first_bytes[len(bom):].decode(enc, 'replace')
4609             break
4610     else:
4611         s = first_bytes.decode('utf-8', 'replace')
4612
4613     return re.match(r'^\s*<', s)
4614
4615
4616 def determine_protocol(info_dict):
4617     protocol = info_dict.get('protocol')
4618     if protocol is not None:
4619         return protocol
4620
4621     url = info_dict['url']
4622     if url.startswith('rtmp'):
4623         return 'rtmp'
4624     elif url.startswith('mms'):
4625         return 'mms'
4626     elif url.startswith('rtsp'):
4627         return 'rtsp'
4628
4629     ext = determine_ext(url)
4630     if ext == 'm3u8':
4631         return 'm3u8'
4632     elif ext == 'f4m':
4633         return 'f4m'
4634
4635     return compat_urllib_parse_urlparse(url).scheme
4636
4637
4638 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4639     """ Render a list of rows, each as a list of values """
4640
4641     def get_max_lens(table):
4642         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4643
4644     def filter_using_list(row, filterArray):
4645         return [col for (take, col) in zip(filterArray, row) if take]
4646
4647     if hideEmpty:
4648         max_lens = get_max_lens(data)
4649         header_row = filter_using_list(header_row, max_lens)
4650         data = [filter_using_list(row, max_lens) for row in data]
4651
4652     table = [header_row] + data
4653     max_lens = get_max_lens(table)
4654     if delim:
4655         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4656     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4657     return '\n'.join(format_str % tuple(row) for row in table)
4658
4659
4660 def _match_one(filter_part, dct, incomplete):
4661     # TODO: Generalize code with YoutubeDL._build_format_filter
4662     STRING_OPERATORS = {
4663         '*=': operator.contains,
4664         '^=': lambda attr, value: attr.startswith(value),
4665         '$=': lambda attr, value: attr.endswith(value),
4666         '~=': lambda attr, value: re.search(value, attr),
4667     }
4668     COMPARISON_OPERATORS = {
4669         **STRING_OPERATORS,
4670         '<=': operator.le,  # "<=" must be defined above "<"
4671         '<': operator.lt,
4672         '>=': operator.ge,
4673         '>': operator.gt,
4674         '=': operator.eq,
4675     }
4676
4677     operator_rex = re.compile(r'''(?x)\s*
4678         (?P<key>[a-z_]+)
4679         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4680         (?:
4681             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4682             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4683             (?P<strval>.+?)
4684         )
4685         \s*$
4686         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4687     m = operator_rex.search(filter_part)
4688     if m:
4689         unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4690         if m.group('negation'):
4691             op = lambda attr, value: not unnegated_op(attr, value)
4692         else:
4693             op = unnegated_op
4694         actual_value = dct.get(m.group('key'))
4695         if (m.group('quotedstrval') is not None
4696             or m.group('strval') is not None
4697             # If the original field is a string and matching comparisonvalue is
4698             # a number we should respect the origin of the original field
4699             # and process comparison value as a string (see
4700             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4701             or actual_value is not None and m.group('intval') is not None
4702                 and isinstance(actual_value, compat_str)):
4703             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4704             quote = m.group('quote')
4705             if quote is not None:
4706                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4707         else:
4708             if m.group('op') in STRING_OPERATORS:
4709                 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4710             try:
4711                 comparison_value = int(m.group('intval'))
4712             except ValueError:
4713                 comparison_value = parse_filesize(m.group('intval'))
4714                 if comparison_value is None:
4715                     comparison_value = parse_filesize(m.group('intval') + 'B')
4716                 if comparison_value is None:
4717                     raise ValueError(
4718                         'Invalid integer value %r in filter part %r' % (
4719                             m.group('intval'), filter_part))
4720         if actual_value is None:
4721             return incomplete or m.group('none_inclusive')
4722         return op(actual_value, comparison_value)
4723
4724     UNARY_OPERATORS = {
4725         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4726         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4727     }
4728     operator_rex = re.compile(r'''(?x)\s*
4729         (?P<op>%s)\s*(?P<key>[a-z_]+)
4730         \s*$
4731         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4732     m = operator_rex.search(filter_part)
4733     if m:
4734         op = UNARY_OPERATORS[m.group('op')]
4735         actual_value = dct.get(m.group('key'))
4736         if incomplete and actual_value is None:
4737             return True
4738         return op(actual_value)
4739
4740     raise ValueError('Invalid filter part %r' % filter_part)
4741
4742
4743 def match_str(filter_str, dct, incomplete=False):
4744     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4745         When incomplete, all conditions passes on missing fields
4746     """
4747     return all(
4748         _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4749         for filter_part in re.split(r'(?<!\\)&', filter_str))
4750
4751
4752 def match_filter_func(filter_str):
4753     def _match_func(info_dict, *args, **kwargs):
4754         if match_str(filter_str, info_dict, *args, **kwargs):
4755             return None
4756         else:
4757             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4758             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4759     return _match_func
4760
4761
4762 def parse_dfxp_time_expr(time_expr):
4763     if not time_expr:
4764         return
4765
4766     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4767     if mobj:
4768         return float(mobj.group('time_offset'))
4769
4770     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4771     if mobj:
4772         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4773
4774
4775 def srt_subtitles_timecode(seconds):
4776     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4777
4778
4779 def dfxp2srt(dfxp_data):
4780     '''
4781     @param dfxp_data A bytes-like object containing DFXP data
4782     @returns A unicode object containing converted SRT data
4783     '''
4784     LEGACY_NAMESPACES = (
4785         (b'http://www.w3.org/ns/ttml', [
4786             b'http://www.w3.org/2004/11/ttaf1',
4787             b'http://www.w3.org/2006/04/ttaf1',
4788             b'http://www.w3.org/2006/10/ttaf1',
4789         ]),
4790         (b'http://www.w3.org/ns/ttml#styling', [
4791             b'http://www.w3.org/ns/ttml#style',
4792         ]),
4793     )
4794
4795     SUPPORTED_STYLING = [
4796         'color',
4797         'fontFamily',
4798         'fontSize',
4799         'fontStyle',
4800         'fontWeight',
4801         'textDecoration'
4802     ]
4803
4804     _x = functools.partial(xpath_with_ns, ns_map={
4805         'xml': 'http://www.w3.org/XML/1998/namespace',
4806         'ttml': 'http://www.w3.org/ns/ttml',
4807         'tts': 'http://www.w3.org/ns/ttml#styling',
4808     })
4809
4810     styles = {}
4811     default_style = {}
4812
4813     class TTMLPElementParser(object):
4814         _out = ''
4815         _unclosed_elements = []
4816         _applied_styles = []
4817
4818         def start(self, tag, attrib):
4819             if tag in (_x('ttml:br'), 'br'):
4820                 self._out += '\n'
4821             else:
4822                 unclosed_elements = []
4823                 style = {}
4824                 element_style_id = attrib.get('style')
4825                 if default_style:
4826                     style.update(default_style)
4827                 if element_style_id:
4828                     style.update(styles.get(element_style_id, {}))
4829                 for prop in SUPPORTED_STYLING:
4830                     prop_val = attrib.get(_x('tts:' + prop))
4831                     if prop_val:
4832                         style[prop] = prop_val
4833                 if style:
4834                     font = ''
4835                     for k, v in sorted(style.items()):
4836                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4837                             continue
4838                         if k == 'color':
4839                             font += ' color="%s"' % v
4840                         elif k == 'fontSize':
4841                             font += ' size="%s"' % v
4842                         elif k == 'fontFamily':
4843                             font += ' face="%s"' % v
4844                         elif k == 'fontWeight' and v == 'bold':
4845                             self._out += '<b>'
4846                             unclosed_elements.append('b')
4847                         elif k == 'fontStyle' and v == 'italic':
4848                             self._out += '<i>'
4849                             unclosed_elements.append('i')
4850                         elif k == 'textDecoration' and v == 'underline':
4851                             self._out += '<u>'
4852                             unclosed_elements.append('u')
4853                     if font:
4854                         self._out += '<font' + font + '>'
4855                         unclosed_elements.append('font')
4856                     applied_style = {}
4857                     if self._applied_styles:
4858                         applied_style.update(self._applied_styles[-1])
4859                     applied_style.update(style)
4860                     self._applied_styles.append(applied_style)
4861                 self._unclosed_elements.append(unclosed_elements)
4862
4863         def end(self, tag):
4864             if tag not in (_x('ttml:br'), 'br'):
4865                 unclosed_elements = self._unclosed_elements.pop()
4866                 for element in reversed(unclosed_elements):
4867                     self._out += '</%s>' % element
4868                 if unclosed_elements and self._applied_styles:
4869                     self._applied_styles.pop()
4870
4871         def data(self, data):
4872             self._out += data
4873
4874         def close(self):
4875             return self._out.strip()
4876
4877     def parse_node(node):
4878         target = TTMLPElementParser()
4879         parser = xml.etree.ElementTree.XMLParser(target=target)
4880         parser.feed(xml.etree.ElementTree.tostring(node))
4881         return parser.close()
4882
4883     for k, v in LEGACY_NAMESPACES:
4884         for ns in v:
4885             dfxp_data = dfxp_data.replace(ns, k)
4886
4887     dfxp = compat_etree_fromstring(dfxp_data)
4888     out = []
4889     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4890
4891     if not paras:
4892         raise ValueError('Invalid dfxp/TTML subtitle')
4893
4894     repeat = False
4895     while True:
4896         for style in dfxp.findall(_x('.//ttml:style')):
4897             style_id = style.get('id') or style.get(_x('xml:id'))
4898             if not style_id:
4899                 continue
4900             parent_style_id = style.get('style')
4901             if parent_style_id:
4902                 if parent_style_id not in styles:
4903                     repeat = True
4904                     continue
4905                 styles[style_id] = styles[parent_style_id].copy()
4906             for prop in SUPPORTED_STYLING:
4907                 prop_val = style.get(_x('tts:' + prop))
4908                 if prop_val:
4909                     styles.setdefault(style_id, {})[prop] = prop_val
4910         if repeat:
4911             repeat = False
4912         else:
4913             break
4914
4915     for p in ('body', 'div'):
4916         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4917         if ele is None:
4918             continue
4919         style = styles.get(ele.get('style'))
4920         if not style:
4921             continue
4922         default_style.update(style)
4923
4924     for para, index in zip(paras, itertools.count(1)):
4925         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4926         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4927         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4928         if begin_time is None:
4929             continue
4930         if not end_time:
4931             if not dur:
4932                 continue
4933             end_time = begin_time + dur
4934         out.append('%d\n%s --> %s\n%s\n\n' % (
4935             index,
4936             srt_subtitles_timecode(begin_time),
4937             srt_subtitles_timecode(end_time),
4938             parse_node(para)))
4939
4940     return ''.join(out)
4941
4942
4943 def cli_option(params, command_option, param):
4944     param = params.get(param)
4945     if param:
4946         param = compat_str(param)
4947     return [command_option, param] if param is not None else []
4948
4949
4950 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4951     param = params.get(param)
4952     if param is None:
4953         return []
4954     assert isinstance(param, bool)
4955     if separator:
4956         return [command_option + separator + (true_value if param else false_value)]
4957     return [command_option, true_value if param else false_value]
4958
4959
4960 def cli_valueless_option(params, command_option, param, expected_value=True):
4961     param = params.get(param)
4962     return [command_option] if param == expected_value else []
4963
4964
4965 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4966     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4967         if use_compat:
4968             return argdict
4969         else:
4970             argdict = None
4971     if argdict is None:
4972         return default
4973     assert isinstance(argdict, dict)
4974
4975     assert isinstance(keys, (list, tuple))
4976     for key_list in keys:
4977         arg_list = list(filter(
4978             lambda x: x is not None,
4979             [argdict.get(key.lower()) for key in variadic(key_list)]))
4980         if arg_list:
4981             return [arg for args in arg_list for arg in args]
4982     return default
4983
4984
4985 class ISO639Utils(object):
4986     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4987     _lang_map = {
4988         'aa': 'aar',
4989         'ab': 'abk',
4990         'ae': 'ave',
4991         'af': 'afr',
4992         'ak': 'aka',
4993         'am': 'amh',
4994         'an': 'arg',
4995         'ar': 'ara',
4996         'as': 'asm',
4997         'av': 'ava',
4998         'ay': 'aym',
4999         'az': 'aze',
5000         'ba': 'bak',
5001         'be': 'bel',
5002         'bg': 'bul',
5003         'bh': 'bih',
5004         'bi': 'bis',
5005         'bm': 'bam',
5006         'bn': 'ben',
5007         'bo': 'bod',
5008         'br': 'bre',
5009         'bs': 'bos',
5010         'ca': 'cat',
5011         'ce': 'che',
5012         'ch': 'cha',
5013         'co': 'cos',
5014         'cr': 'cre',
5015         'cs': 'ces',
5016         'cu': 'chu',
5017         'cv': 'chv',
5018         'cy': 'cym',
5019         'da': 'dan',
5020         'de': 'deu',
5021         'dv': 'div',
5022         'dz': 'dzo',
5023         'ee': 'ewe',
5024         'el': 'ell',
5025         'en': 'eng',
5026         'eo': 'epo',
5027         'es': 'spa',
5028         'et': 'est',
5029         'eu': 'eus',
5030         'fa': 'fas',
5031         'ff': 'ful',
5032         'fi': 'fin',
5033         'fj': 'fij',
5034         'fo': 'fao',
5035         'fr': 'fra',
5036         'fy': 'fry',
5037         'ga': 'gle',
5038         'gd': 'gla',
5039         'gl': 'glg',
5040         'gn': 'grn',
5041         'gu': 'guj',
5042         'gv': 'glv',
5043         'ha': 'hau',
5044         'he': 'heb',
5045         'iw': 'heb',  # Replaced by he in 1989 revision
5046         'hi': 'hin',
5047         'ho': 'hmo',
5048         'hr': 'hrv',
5049         'ht': 'hat',
5050         'hu': 'hun',
5051         'hy': 'hye',
5052         'hz': 'her',
5053         'ia': 'ina',
5054         'id': 'ind',
5055         'in': 'ind',  # Replaced by id in 1989 revision
5056         'ie': 'ile',
5057         'ig': 'ibo',
5058         'ii': 'iii',
5059         'ik': 'ipk',
5060         'io': 'ido',
5061         'is': 'isl',
5062         'it': 'ita',
5063         'iu': 'iku',
5064         'ja': 'jpn',
5065         'jv': 'jav',
5066         'ka': 'kat',
5067         'kg': 'kon',
5068         'ki': 'kik',
5069         'kj': 'kua',
5070         'kk': 'kaz',
5071         'kl': 'kal',
5072         'km': 'khm',
5073         'kn': 'kan',
5074         'ko': 'kor',
5075         'kr': 'kau',
5076         'ks': 'kas',
5077         'ku': 'kur',
5078         'kv': 'kom',
5079         'kw': 'cor',
5080         'ky': 'kir',
5081         'la': 'lat',
5082         'lb': 'ltz',
5083         'lg': 'lug',
5084         'li': 'lim',
5085         'ln': 'lin',
5086         'lo': 'lao',
5087         'lt': 'lit',
5088         'lu': 'lub',
5089         'lv': 'lav',
5090         'mg': 'mlg',
5091         'mh': 'mah',
5092         'mi': 'mri',
5093         'mk': 'mkd',
5094         'ml': 'mal',
5095         'mn': 'mon',
5096         'mr': 'mar',
5097         'ms': 'msa',
5098         'mt': 'mlt',
5099         'my': 'mya',
5100         'na': 'nau',
5101         'nb': 'nob',
5102         'nd': 'nde',
5103         'ne': 'nep',
5104         'ng': 'ndo',
5105         'nl': 'nld',
5106         'nn': 'nno',
5107         'no': 'nor',
5108         'nr': 'nbl',
5109         'nv': 'nav',
5110         'ny': 'nya',
5111         'oc': 'oci',
5112         'oj': 'oji',
5113         'om': 'orm',
5114         'or': 'ori',
5115         'os': 'oss',
5116         'pa': 'pan',
5117         'pi': 'pli',
5118         'pl': 'pol',
5119         'ps': 'pus',
5120         'pt': 'por',
5121         'qu': 'que',
5122         'rm': 'roh',
5123         'rn': 'run',
5124         'ro': 'ron',
5125         'ru': 'rus',
5126         'rw': 'kin',
5127         'sa': 'san',
5128         'sc': 'srd',
5129         'sd': 'snd',
5130         'se': 'sme',
5131         'sg': 'sag',
5132         'si': 'sin',
5133         'sk': 'slk',
5134         'sl': 'slv',
5135         'sm': 'smo',
5136         'sn': 'sna',
5137         'so': 'som',
5138         'sq': 'sqi',
5139         'sr': 'srp',
5140         'ss': 'ssw',
5141         'st': 'sot',
5142         'su': 'sun',
5143         'sv': 'swe',
5144         'sw': 'swa',
5145         'ta': 'tam',
5146         'te': 'tel',
5147         'tg': 'tgk',
5148         'th': 'tha',
5149         'ti': 'tir',
5150         'tk': 'tuk',
5151         'tl': 'tgl',
5152         'tn': 'tsn',
5153         'to': 'ton',
5154         'tr': 'tur',
5155         'ts': 'tso',
5156         'tt': 'tat',
5157         'tw': 'twi',
5158         'ty': 'tah',
5159         'ug': 'uig',
5160         'uk': 'ukr',
5161         'ur': 'urd',
5162         'uz': 'uzb',
5163         've': 'ven',
5164         'vi': 'vie',
5165         'vo': 'vol',
5166         'wa': 'wln',
5167         'wo': 'wol',
5168         'xh': 'xho',
5169         'yi': 'yid',
5170         'ji': 'yid',  # Replaced by yi in 1989 revision
5171         'yo': 'yor',
5172         'za': 'zha',
5173         'zh': 'zho',
5174         'zu': 'zul',
5175     }
5176
5177     @classmethod
5178     def short2long(cls, code):
5179         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5180         return cls._lang_map.get(code[:2])
5181
5182     @classmethod
5183     def long2short(cls, code):
5184         """Convert language code from ISO 639-2/T to ISO 639-1"""
5185         for short_name, long_name in cls._lang_map.items():
5186             if long_name == code:
5187                 return short_name
5188
5189
5190 class ISO3166Utils(object):
5191     # From http://data.okfn.org/data/core/country-list
5192     _country_map = {
5193         'AF': 'Afghanistan',
5194         'AX': 'Åland Islands',
5195         'AL': 'Albania',
5196         'DZ': 'Algeria',
5197         'AS': 'American Samoa',
5198         'AD': 'Andorra',
5199         'AO': 'Angola',
5200         'AI': 'Anguilla',
5201         'AQ': 'Antarctica',
5202         'AG': 'Antigua and Barbuda',
5203         'AR': 'Argentina',
5204         'AM': 'Armenia',
5205         'AW': 'Aruba',
5206         'AU': 'Australia',
5207         'AT': 'Austria',
5208         'AZ': 'Azerbaijan',
5209         'BS': 'Bahamas',
5210         'BH': 'Bahrain',
5211         'BD': 'Bangladesh',
5212         'BB': 'Barbados',
5213         'BY': 'Belarus',
5214         'BE': 'Belgium',
5215         'BZ': 'Belize',
5216         'BJ': 'Benin',
5217         'BM': 'Bermuda',
5218         'BT': 'Bhutan',
5219         'BO': 'Bolivia, Plurinational State of',
5220         'BQ': 'Bonaire, Sint Eustatius and Saba',
5221         'BA': 'Bosnia and Herzegovina',
5222         'BW': 'Botswana',
5223         'BV': 'Bouvet Island',
5224         'BR': 'Brazil',
5225         'IO': 'British Indian Ocean Territory',
5226         'BN': 'Brunei Darussalam',
5227         'BG': 'Bulgaria',
5228         'BF': 'Burkina Faso',
5229         'BI': 'Burundi',
5230         'KH': 'Cambodia',
5231         'CM': 'Cameroon',
5232         'CA': 'Canada',
5233         'CV': 'Cape Verde',
5234         'KY': 'Cayman Islands',
5235         'CF': 'Central African Republic',
5236         'TD': 'Chad',
5237         'CL': 'Chile',
5238         'CN': 'China',
5239         'CX': 'Christmas Island',
5240         'CC': 'Cocos (Keeling) Islands',
5241         'CO': 'Colombia',
5242         'KM': 'Comoros',
5243         'CG': 'Congo',
5244         'CD': 'Congo, the Democratic Republic of the',
5245         'CK': 'Cook Islands',
5246         'CR': 'Costa Rica',
5247         'CI': 'Côte d\'Ivoire',
5248         'HR': 'Croatia',
5249         'CU': 'Cuba',
5250         'CW': 'Curaçao',
5251         'CY': 'Cyprus',
5252         'CZ': 'Czech Republic',
5253         'DK': 'Denmark',
5254         'DJ': 'Djibouti',
5255         'DM': 'Dominica',
5256         'DO': 'Dominican Republic',
5257         'EC': 'Ecuador',
5258         'EG': 'Egypt',
5259         'SV': 'El Salvador',
5260         'GQ': 'Equatorial Guinea',
5261         'ER': 'Eritrea',
5262         'EE': 'Estonia',
5263         'ET': 'Ethiopia',
5264         'FK': 'Falkland Islands (Malvinas)',
5265         'FO': 'Faroe Islands',
5266         'FJ': 'Fiji',
5267         'FI': 'Finland',
5268         'FR': 'France',
5269         'GF': 'French Guiana',
5270         'PF': 'French Polynesia',
5271         'TF': 'French Southern Territories',
5272         'GA': 'Gabon',
5273         'GM': 'Gambia',
5274         'GE': 'Georgia',
5275         'DE': 'Germany',
5276         'GH': 'Ghana',
5277         'GI': 'Gibraltar',
5278         'GR': 'Greece',
5279         'GL': 'Greenland',
5280         'GD': 'Grenada',
5281         'GP': 'Guadeloupe',
5282         'GU': 'Guam',
5283         'GT': 'Guatemala',
5284         'GG': 'Guernsey',
5285         'GN': 'Guinea',
5286         'GW': 'Guinea-Bissau',
5287         'GY': 'Guyana',
5288         'HT': 'Haiti',
5289         'HM': 'Heard Island and McDonald Islands',
5290         'VA': 'Holy See (Vatican City State)',
5291         'HN': 'Honduras',
5292         'HK': 'Hong Kong',
5293         'HU': 'Hungary',
5294         'IS': 'Iceland',
5295         'IN': 'India',
5296         'ID': 'Indonesia',
5297         'IR': 'Iran, Islamic Republic of',
5298         'IQ': 'Iraq',
5299         'IE': 'Ireland',
5300         'IM': 'Isle of Man',
5301         'IL': 'Israel',
5302         'IT': 'Italy',
5303         'JM': 'Jamaica',
5304         'JP': 'Japan',
5305         'JE': 'Jersey',
5306         'JO': 'Jordan',
5307         'KZ': 'Kazakhstan',
5308         'KE': 'Kenya',
5309         'KI': 'Kiribati',
5310         'KP': 'Korea, Democratic People\'s Republic of',
5311         'KR': 'Korea, Republic of',
5312         'KW': 'Kuwait',
5313         'KG': 'Kyrgyzstan',
5314         'LA': 'Lao People\'s Democratic Republic',
5315         'LV': 'Latvia',
5316         'LB': 'Lebanon',
5317         'LS': 'Lesotho',
5318         'LR': 'Liberia',
5319         'LY': 'Libya',
5320         'LI': 'Liechtenstein',
5321         'LT': 'Lithuania',
5322         'LU': 'Luxembourg',
5323         'MO': 'Macao',
5324         'MK': 'Macedonia, the Former Yugoslav Republic of',
5325         'MG': 'Madagascar',
5326         'MW': 'Malawi',
5327         'MY': 'Malaysia',
5328         'MV': 'Maldives',
5329         'ML': 'Mali',
5330         'MT': 'Malta',
5331         'MH': 'Marshall Islands',
5332         'MQ': 'Martinique',
5333         'MR': 'Mauritania',
5334         'MU': 'Mauritius',
5335         'YT': 'Mayotte',
5336         'MX': 'Mexico',
5337         'FM': 'Micronesia, Federated States of',
5338         'MD': 'Moldova, Republic of',
5339         'MC': 'Monaco',
5340         'MN': 'Mongolia',
5341         'ME': 'Montenegro',
5342         'MS': 'Montserrat',
5343         'MA': 'Morocco',
5344         'MZ': 'Mozambique',
5345         'MM': 'Myanmar',
5346         'NA': 'Namibia',
5347         'NR': 'Nauru',
5348         'NP': 'Nepal',
5349         'NL': 'Netherlands',
5350         'NC': 'New Caledonia',
5351         'NZ': 'New Zealand',
5352         'NI': 'Nicaragua',
5353         'NE': 'Niger',
5354         'NG': 'Nigeria',
5355         'NU': 'Niue',
5356         'NF': 'Norfolk Island',
5357         'MP': 'Northern Mariana Islands',
5358         'NO': 'Norway',
5359         'OM': 'Oman',
5360         'PK': 'Pakistan',
5361         'PW': 'Palau',
5362         'PS': 'Palestine, State of',
5363         'PA': 'Panama',
5364         'PG': 'Papua New Guinea',
5365         'PY': 'Paraguay',
5366         'PE': 'Peru',
5367         'PH': 'Philippines',
5368         'PN': 'Pitcairn',
5369         'PL': 'Poland',
5370         'PT': 'Portugal',
5371         'PR': 'Puerto Rico',
5372         'QA': 'Qatar',
5373         'RE': 'Réunion',
5374         'RO': 'Romania',
5375         'RU': 'Russian Federation',
5376         'RW': 'Rwanda',
5377         'BL': 'Saint Barthélemy',
5378         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5379         'KN': 'Saint Kitts and Nevis',
5380         'LC': 'Saint Lucia',
5381         'MF': 'Saint Martin (French part)',
5382         'PM': 'Saint Pierre and Miquelon',
5383         'VC': 'Saint Vincent and the Grenadines',
5384         'WS': 'Samoa',
5385         'SM': 'San Marino',
5386         'ST': 'Sao Tome and Principe',
5387         'SA': 'Saudi Arabia',
5388         'SN': 'Senegal',
5389         'RS': 'Serbia',
5390         'SC': 'Seychelles',
5391         'SL': 'Sierra Leone',
5392         'SG': 'Singapore',
5393         'SX': 'Sint Maarten (Dutch part)',
5394         'SK': 'Slovakia',
5395         'SI': 'Slovenia',
5396         'SB': 'Solomon Islands',
5397         'SO': 'Somalia',
5398         'ZA': 'South Africa',
5399         'GS': 'South Georgia and the South Sandwich Islands',
5400         'SS': 'South Sudan',
5401         'ES': 'Spain',
5402         'LK': 'Sri Lanka',
5403         'SD': 'Sudan',
5404         'SR': 'Suriname',
5405         'SJ': 'Svalbard and Jan Mayen',
5406         'SZ': 'Swaziland',
5407         'SE': 'Sweden',
5408         'CH': 'Switzerland',
5409         'SY': 'Syrian Arab Republic',
5410         'TW': 'Taiwan, Province of China',
5411         'TJ': 'Tajikistan',
5412         'TZ': 'Tanzania, United Republic of',
5413         'TH': 'Thailand',
5414         'TL': 'Timor-Leste',
5415         'TG': 'Togo',
5416         'TK': 'Tokelau',
5417         'TO': 'Tonga',
5418         'TT': 'Trinidad and Tobago',
5419         'TN': 'Tunisia',
5420         'TR': 'Turkey',
5421         'TM': 'Turkmenistan',
5422         'TC': 'Turks and Caicos Islands',
5423         'TV': 'Tuvalu',
5424         'UG': 'Uganda',
5425         'UA': 'Ukraine',
5426         'AE': 'United Arab Emirates',
5427         'GB': 'United Kingdom',
5428         'US': 'United States',
5429         'UM': 'United States Minor Outlying Islands',
5430         'UY': 'Uruguay',
5431         'UZ': 'Uzbekistan',
5432         'VU': 'Vanuatu',
5433         'VE': 'Venezuela, Bolivarian Republic of',
5434         'VN': 'Viet Nam',
5435         'VG': 'Virgin Islands, British',
5436         'VI': 'Virgin Islands, U.S.',
5437         'WF': 'Wallis and Futuna',
5438         'EH': 'Western Sahara',
5439         'YE': 'Yemen',
5440         'ZM': 'Zambia',
5441         'ZW': 'Zimbabwe',
5442     }
5443
5444     @classmethod
5445     def short2full(cls, code):
5446         """Convert an ISO 3166-2 country code to the corresponding full name"""
5447         return cls._country_map.get(code.upper())
5448
5449
5450 class GeoUtils(object):
5451     # Major IPv4 address blocks per country
5452     _country_ip_map = {
5453         'AD': '46.172.224.0/19',
5454         'AE': '94.200.0.0/13',
5455         'AF': '149.54.0.0/17',
5456         'AG': '209.59.64.0/18',
5457         'AI': '204.14.248.0/21',
5458         'AL': '46.99.0.0/16',
5459         'AM': '46.70.0.0/15',
5460         'AO': '105.168.0.0/13',
5461         'AP': '182.50.184.0/21',
5462         'AQ': '23.154.160.0/24',
5463         'AR': '181.0.0.0/12',
5464         'AS': '202.70.112.0/20',
5465         'AT': '77.116.0.0/14',
5466         'AU': '1.128.0.0/11',
5467         'AW': '181.41.0.0/18',
5468         'AX': '185.217.4.0/22',
5469         'AZ': '5.197.0.0/16',
5470         'BA': '31.176.128.0/17',
5471         'BB': '65.48.128.0/17',
5472         'BD': '114.130.0.0/16',
5473         'BE': '57.0.0.0/8',
5474         'BF': '102.178.0.0/15',
5475         'BG': '95.42.0.0/15',
5476         'BH': '37.131.0.0/17',
5477         'BI': '154.117.192.0/18',
5478         'BJ': '137.255.0.0/16',
5479         'BL': '185.212.72.0/23',
5480         'BM': '196.12.64.0/18',
5481         'BN': '156.31.0.0/16',
5482         'BO': '161.56.0.0/16',
5483         'BQ': '161.0.80.0/20',
5484         'BR': '191.128.0.0/12',
5485         'BS': '24.51.64.0/18',
5486         'BT': '119.2.96.0/19',
5487         'BW': '168.167.0.0/16',
5488         'BY': '178.120.0.0/13',
5489         'BZ': '179.42.192.0/18',
5490         'CA': '99.224.0.0/11',
5491         'CD': '41.243.0.0/16',
5492         'CF': '197.242.176.0/21',
5493         'CG': '160.113.0.0/16',
5494         'CH': '85.0.0.0/13',
5495         'CI': '102.136.0.0/14',
5496         'CK': '202.65.32.0/19',
5497         'CL': '152.172.0.0/14',
5498         'CM': '102.244.0.0/14',
5499         'CN': '36.128.0.0/10',
5500         'CO': '181.240.0.0/12',
5501         'CR': '201.192.0.0/12',
5502         'CU': '152.206.0.0/15',
5503         'CV': '165.90.96.0/19',
5504         'CW': '190.88.128.0/17',
5505         'CY': '31.153.0.0/16',
5506         'CZ': '88.100.0.0/14',
5507         'DE': '53.0.0.0/8',
5508         'DJ': '197.241.0.0/17',
5509         'DK': '87.48.0.0/12',
5510         'DM': '192.243.48.0/20',
5511         'DO': '152.166.0.0/15',
5512         'DZ': '41.96.0.0/12',
5513         'EC': '186.68.0.0/15',
5514         'EE': '90.190.0.0/15',
5515         'EG': '156.160.0.0/11',
5516         'ER': '196.200.96.0/20',
5517         'ES': '88.0.0.0/11',
5518         'ET': '196.188.0.0/14',
5519         'EU': '2.16.0.0/13',
5520         'FI': '91.152.0.0/13',
5521         'FJ': '144.120.0.0/16',
5522         'FK': '80.73.208.0/21',
5523         'FM': '119.252.112.0/20',
5524         'FO': '88.85.32.0/19',
5525         'FR': '90.0.0.0/9',
5526         'GA': '41.158.0.0/15',
5527         'GB': '25.0.0.0/8',
5528         'GD': '74.122.88.0/21',
5529         'GE': '31.146.0.0/16',
5530         'GF': '161.22.64.0/18',
5531         'GG': '62.68.160.0/19',
5532         'GH': '154.160.0.0/12',
5533         'GI': '95.164.0.0/16',
5534         'GL': '88.83.0.0/19',
5535         'GM': '160.182.0.0/15',
5536         'GN': '197.149.192.0/18',
5537         'GP': '104.250.0.0/19',
5538         'GQ': '105.235.224.0/20',
5539         'GR': '94.64.0.0/13',
5540         'GT': '168.234.0.0/16',
5541         'GU': '168.123.0.0/16',
5542         'GW': '197.214.80.0/20',
5543         'GY': '181.41.64.0/18',
5544         'HK': '113.252.0.0/14',
5545         'HN': '181.210.0.0/16',
5546         'HR': '93.136.0.0/13',
5547         'HT': '148.102.128.0/17',
5548         'HU': '84.0.0.0/14',
5549         'ID': '39.192.0.0/10',
5550         'IE': '87.32.0.0/12',
5551         'IL': '79.176.0.0/13',
5552         'IM': '5.62.80.0/20',
5553         'IN': '117.192.0.0/10',
5554         'IO': '203.83.48.0/21',
5555         'IQ': '37.236.0.0/14',
5556         'IR': '2.176.0.0/12',
5557         'IS': '82.221.0.0/16',
5558         'IT': '79.0.0.0/10',
5559         'JE': '87.244.64.0/18',
5560         'JM': '72.27.0.0/17',
5561         'JO': '176.29.0.0/16',
5562         'JP': '133.0.0.0/8',
5563         'KE': '105.48.0.0/12',
5564         'KG': '158.181.128.0/17',
5565         'KH': '36.37.128.0/17',
5566         'KI': '103.25.140.0/22',
5567         'KM': '197.255.224.0/20',
5568         'KN': '198.167.192.0/19',
5569         'KP': '175.45.176.0/22',
5570         'KR': '175.192.0.0/10',
5571         'KW': '37.36.0.0/14',
5572         'KY': '64.96.0.0/15',
5573         'KZ': '2.72.0.0/13',
5574         'LA': '115.84.64.0/18',
5575         'LB': '178.135.0.0/16',
5576         'LC': '24.92.144.0/20',
5577         'LI': '82.117.0.0/19',
5578         'LK': '112.134.0.0/15',
5579         'LR': '102.183.0.0/16',
5580         'LS': '129.232.0.0/17',
5581         'LT': '78.56.0.0/13',
5582         'LU': '188.42.0.0/16',
5583         'LV': '46.109.0.0/16',
5584         'LY': '41.252.0.0/14',
5585         'MA': '105.128.0.0/11',
5586         'MC': '88.209.64.0/18',
5587         'MD': '37.246.0.0/16',
5588         'ME': '178.175.0.0/17',
5589         'MF': '74.112.232.0/21',
5590         'MG': '154.126.0.0/17',
5591         'MH': '117.103.88.0/21',
5592         'MK': '77.28.0.0/15',
5593         'ML': '154.118.128.0/18',
5594         'MM': '37.111.0.0/17',
5595         'MN': '49.0.128.0/17',
5596         'MO': '60.246.0.0/16',
5597         'MP': '202.88.64.0/20',
5598         'MQ': '109.203.224.0/19',
5599         'MR': '41.188.64.0/18',
5600         'MS': '208.90.112.0/22',
5601         'MT': '46.11.0.0/16',
5602         'MU': '105.16.0.0/12',
5603         'MV': '27.114.128.0/18',
5604         'MW': '102.70.0.0/15',
5605         'MX': '187.192.0.0/11',
5606         'MY': '175.136.0.0/13',
5607         'MZ': '197.218.0.0/15',
5608         'NA': '41.182.0.0/16',
5609         'NC': '101.101.0.0/18',
5610         'NE': '197.214.0.0/18',
5611         'NF': '203.17.240.0/22',
5612         'NG': '105.112.0.0/12',
5613         'NI': '186.76.0.0/15',
5614         'NL': '145.96.0.0/11',
5615         'NO': '84.208.0.0/13',
5616         'NP': '36.252.0.0/15',
5617         'NR': '203.98.224.0/19',
5618         'NU': '49.156.48.0/22',
5619         'NZ': '49.224.0.0/14',
5620         'OM': '5.36.0.0/15',
5621         'PA': '186.72.0.0/15',
5622         'PE': '186.160.0.0/14',
5623         'PF': '123.50.64.0/18',
5624         'PG': '124.240.192.0/19',
5625         'PH': '49.144.0.0/13',
5626         'PK': '39.32.0.0/11',
5627         'PL': '83.0.0.0/11',
5628         'PM': '70.36.0.0/20',
5629         'PR': '66.50.0.0/16',
5630         'PS': '188.161.0.0/16',
5631         'PT': '85.240.0.0/13',
5632         'PW': '202.124.224.0/20',
5633         'PY': '181.120.0.0/14',
5634         'QA': '37.210.0.0/15',
5635         'RE': '102.35.0.0/16',
5636         'RO': '79.112.0.0/13',
5637         'RS': '93.86.0.0/15',
5638         'RU': '5.136.0.0/13',
5639         'RW': '41.186.0.0/16',
5640         'SA': '188.48.0.0/13',
5641         'SB': '202.1.160.0/19',
5642         'SC': '154.192.0.0/11',
5643         'SD': '102.120.0.0/13',
5644         'SE': '78.64.0.0/12',
5645         'SG': '8.128.0.0/10',
5646         'SI': '188.196.0.0/14',
5647         'SK': '78.98.0.0/15',
5648         'SL': '102.143.0.0/17',
5649         'SM': '89.186.32.0/19',
5650         'SN': '41.82.0.0/15',
5651         'SO': '154.115.192.0/18',
5652         'SR': '186.179.128.0/17',
5653         'SS': '105.235.208.0/21',
5654         'ST': '197.159.160.0/19',
5655         'SV': '168.243.0.0/16',
5656         'SX': '190.102.0.0/20',
5657         'SY': '5.0.0.0/16',
5658         'SZ': '41.84.224.0/19',
5659         'TC': '65.255.48.0/20',
5660         'TD': '154.68.128.0/19',
5661         'TG': '196.168.0.0/14',
5662         'TH': '171.96.0.0/13',
5663         'TJ': '85.9.128.0/18',
5664         'TK': '27.96.24.0/21',
5665         'TL': '180.189.160.0/20',
5666         'TM': '95.85.96.0/19',
5667         'TN': '197.0.0.0/11',
5668         'TO': '175.176.144.0/21',
5669         'TR': '78.160.0.0/11',
5670         'TT': '186.44.0.0/15',
5671         'TV': '202.2.96.0/19',
5672         'TW': '120.96.0.0/11',
5673         'TZ': '156.156.0.0/14',
5674         'UA': '37.52.0.0/14',
5675         'UG': '102.80.0.0/13',
5676         'US': '6.0.0.0/8',
5677         'UY': '167.56.0.0/13',
5678         'UZ': '84.54.64.0/18',
5679         'VA': '212.77.0.0/19',
5680         'VC': '207.191.240.0/21',
5681         'VE': '186.88.0.0/13',
5682         'VG': '66.81.192.0/20',
5683         'VI': '146.226.0.0/16',
5684         'VN': '14.160.0.0/11',
5685         'VU': '202.80.32.0/20',
5686         'WF': '117.20.32.0/21',
5687         'WS': '202.4.32.0/19',
5688         'YE': '134.35.0.0/16',
5689         'YT': '41.242.116.0/22',
5690         'ZA': '41.0.0.0/11',
5691         'ZM': '102.144.0.0/13',
5692         'ZW': '102.177.192.0/18',
5693     }
5694
5695     @classmethod
5696     def random_ipv4(cls, code_or_block):
5697         if len(code_or_block) == 2:
5698             block = cls._country_ip_map.get(code_or_block.upper())
5699             if not block:
5700                 return None
5701         else:
5702             block = code_or_block
5703         addr, preflen = block.split('/')
5704         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5705         addr_max = addr_min | (0xffffffff >> int(preflen))
5706         return compat_str(socket.inet_ntoa(
5707             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5708
5709
5710 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5711     def __init__(self, proxies=None):
5712         # Set default handlers
5713         for type in ('http', 'https'):
5714             setattr(self, '%s_open' % type,
5715                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5716                         meth(r, proxy, type))
5717         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5718
5719     def proxy_open(self, req, proxy, type):
5720         req_proxy = req.headers.get('Ytdl-request-proxy')
5721         if req_proxy is not None:
5722             proxy = req_proxy
5723             del req.headers['Ytdl-request-proxy']
5724
5725         if proxy == '__noproxy__':
5726             return None  # No Proxy
5727         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5728             req.add_header('Ytdl-socks-proxy', proxy)
5729             # yt-dlp's http/https handlers do wrapping the socket with socks
5730             return None
5731         return compat_urllib_request.ProxyHandler.proxy_open(
5732             self, req, proxy, type)
5733
5734
5735 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5736 # released into Public Domain
5737 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5738
5739 def long_to_bytes(n, blocksize=0):
5740     """long_to_bytes(n:long, blocksize:int) : string
5741     Convert a long integer to a byte string.
5742
5743     If optional blocksize is given and greater than zero, pad the front of the
5744     byte string with binary zeros so that the length is a multiple of
5745     blocksize.
5746     """
5747     # after much testing, this algorithm was deemed to be the fastest
5748     s = b''
5749     n = int(n)
5750     while n > 0:
5751         s = compat_struct_pack('>I', n & 0xffffffff) + s
5752         n = n >> 32
5753     # strip off leading zeros
5754     for i in range(len(s)):
5755         if s[i] != b'\000'[0]:
5756             break
5757     else:
5758         # only happens when n == 0
5759         s = b'\000'
5760         i = 0
5761     s = s[i:]
5762     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5763     # de-padding being done above, but sigh...
5764     if blocksize > 0 and len(s) % blocksize:
5765         s = (blocksize - len(s) % blocksize) * b'\000' + s
5766     return s
5767
5768
5769 def bytes_to_long(s):
5770     """bytes_to_long(string) : long
5771     Convert a byte string to a long integer.
5772
5773     This is (essentially) the inverse of long_to_bytes().
5774     """
5775     acc = 0
5776     length = len(s)
5777     if length % 4:
5778         extra = (4 - length % 4)
5779         s = b'\000' * extra + s
5780         length = length + extra
5781     for i in range(0, length, 4):
5782         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5783     return acc
5784
5785
5786 def ohdave_rsa_encrypt(data, exponent, modulus):
5787     '''
5788     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5789
5790     Input:
5791         data: data to encrypt, bytes-like object
5792         exponent, modulus: parameter e and N of RSA algorithm, both integer
5793     Output: hex string of encrypted data
5794
5795     Limitation: supports one block encryption only
5796     '''
5797
5798     payload = int(binascii.hexlify(data[::-1]), 16)
5799     encrypted = pow(payload, exponent, modulus)
5800     return '%x' % encrypted
5801
5802
5803 def pkcs1pad(data, length):
5804     """
5805     Padding input data with PKCS#1 scheme
5806
5807     @param {int[]} data        input data
5808     @param {int}   length      target length
5809     @returns {int[]}           padded data
5810     """
5811     if len(data) > length - 11:
5812         raise ValueError('Input data too long for PKCS#1 padding')
5813
5814     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5815     return [0, 2] + pseudo_random + [0] + data
5816
5817
5818 def encode_base_n(num, n, table=None):
5819     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5820     if not table:
5821         table = FULL_TABLE[:n]
5822
5823     if n > len(table):
5824         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5825
5826     if num == 0:
5827         return table[0]
5828
5829     ret = ''
5830     while num:
5831         ret = table[num % n] + ret
5832         num = num // n
5833     return ret
5834
5835
5836 def decode_packed_codes(code):
5837     mobj = re.search(PACKED_CODES_RE, code)
5838     obfuscated_code, base, count, symbols = mobj.groups()
5839     base = int(base)
5840     count = int(count)
5841     symbols = symbols.split('|')
5842     symbol_table = {}
5843
5844     while count:
5845         count -= 1
5846         base_n_count = encode_base_n(count, base)
5847         symbol_table[base_n_count] = symbols[count] or base_n_count
5848
5849     return re.sub(
5850         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5851         obfuscated_code)
5852
5853
5854 def caesar(s, alphabet, shift):
5855     if shift == 0:
5856         return s
5857     l = len(alphabet)
5858     return ''.join(
5859         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5860         for c in s)
5861
5862
5863 def rot47(s):
5864     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5865
5866
5867 def parse_m3u8_attributes(attrib):
5868     info = {}
5869     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5870         if val.startswith('"'):
5871             val = val[1:-1]
5872         info[key] = val
5873     return info
5874
5875
5876 def urshift(val, n):
5877     return val >> n if val >= 0 else (val + 0x100000000) >> n
5878
5879
5880 # Based on png2str() written by @gdkchan and improved by @yokrysty
5881 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5882 def decode_png(png_data):
5883     # Reference: https://www.w3.org/TR/PNG/
5884     header = png_data[8:]
5885
5886     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5887         raise IOError('Not a valid PNG file.')
5888
5889     int_map = {1: '>B', 2: '>H', 4: '>I'}
5890     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5891
5892     chunks = []
5893
5894     while header:
5895         length = unpack_integer(header[:4])
5896         header = header[4:]
5897
5898         chunk_type = header[:4]
5899         header = header[4:]
5900
5901         chunk_data = header[:length]
5902         header = header[length:]
5903
5904         header = header[4:]  # Skip CRC
5905
5906         chunks.append({
5907             'type': chunk_type,
5908             'length': length,
5909             'data': chunk_data
5910         })
5911
5912     ihdr = chunks[0]['data']
5913
5914     width = unpack_integer(ihdr[:4])
5915     height = unpack_integer(ihdr[4:8])
5916
5917     idat = b''
5918
5919     for chunk in chunks:
5920         if chunk['type'] == b'IDAT':
5921             idat += chunk['data']
5922
5923     if not idat:
5924         raise IOError('Unable to read PNG data.')
5925
5926     decompressed_data = bytearray(zlib.decompress(idat))
5927
5928     stride = width * 3
5929     pixels = []
5930
5931     def _get_pixel(idx):
5932         x = idx % stride
5933         y = idx // stride
5934         return pixels[y][x]
5935
5936     for y in range(height):
5937         basePos = y * (1 + stride)
5938         filter_type = decompressed_data[basePos]
5939
5940         current_row = []
5941
5942         pixels.append(current_row)
5943
5944         for x in range(stride):
5945             color = decompressed_data[1 + basePos + x]
5946             basex = y * stride + x
5947             left = 0
5948             up = 0
5949
5950             if x > 2:
5951                 left = _get_pixel(basex - 3)
5952             if y > 0:
5953                 up = _get_pixel(basex - stride)
5954
5955             if filter_type == 1:  # Sub
5956                 color = (color + left) & 0xff
5957             elif filter_type == 2:  # Up
5958                 color = (color + up) & 0xff
5959             elif filter_type == 3:  # Average
5960                 color = (color + ((left + up) >> 1)) & 0xff
5961             elif filter_type == 4:  # Paeth
5962                 a = left
5963                 b = up
5964                 c = 0
5965
5966                 if x > 2 and y > 0:
5967                     c = _get_pixel(basex - stride - 3)
5968
5969                 p = a + b - c
5970
5971                 pa = abs(p - a)
5972                 pb = abs(p - b)
5973                 pc = abs(p - c)
5974
5975                 if pa <= pb and pa <= pc:
5976                     color = (color + a) & 0xff
5977                 elif pb <= pc:
5978                     color = (color + b) & 0xff
5979                 else:
5980                     color = (color + c) & 0xff
5981
5982             current_row.append(color)
5983
5984     return width, height, pixels
5985
5986
5987 def write_xattr(path, key, value):
5988     # This mess below finds the best xattr tool for the job
5989     try:
5990         # try the pyxattr module...
5991         import xattr
5992
5993         if hasattr(xattr, 'set'):  # pyxattr
5994             # Unicode arguments are not supported in python-pyxattr until
5995             # version 0.5.0
5996             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5997             pyxattr_required_version = '0.5.0'
5998             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5999                 # TODO: fallback to CLI tools
6000                 raise XAttrUnavailableError(
6001                     'python-pyxattr is detected but is too old. '
6002                     'yt-dlp requires %s or above while your version is %s. '
6003                     'Falling back to other xattr implementations' % (
6004                         pyxattr_required_version, xattr.__version__))
6005
6006             setxattr = xattr.set
6007         else:  # xattr
6008             setxattr = xattr.setxattr
6009
6010         try:
6011             setxattr(path, key, value)
6012         except EnvironmentError as e:
6013             raise XAttrMetadataError(e.errno, e.strerror)
6014
6015     except ImportError:
6016         if compat_os_name == 'nt':
6017             # Write xattrs to NTFS Alternate Data Streams:
6018             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6019             assert ':' not in key
6020             assert os.path.exists(path)
6021
6022             ads_fn = path + ':' + key
6023             try:
6024                 with open(ads_fn, 'wb') as f:
6025                     f.write(value)
6026             except EnvironmentError as e:
6027                 raise XAttrMetadataError(e.errno, e.strerror)
6028         else:
6029             user_has_setfattr = check_executable('setfattr', ['--version'])
6030             user_has_xattr = check_executable('xattr', ['-h'])
6031
6032             if user_has_setfattr or user_has_xattr:
6033
6034                 value = value.decode('utf-8')
6035                 if user_has_setfattr:
6036                     executable = 'setfattr'
6037                     opts = ['-n', key, '-v', value]
6038                 elif user_has_xattr:
6039                     executable = 'xattr'
6040                     opts = ['-w', key, value]
6041
6042                 cmd = ([encodeFilename(executable, True)]
6043                        + [encodeArgument(o) for o in opts]
6044                        + [encodeFilename(path, True)])
6045
6046                 try:
6047                     p = subprocess.Popen(
6048                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6049                 except EnvironmentError as e:
6050                     raise XAttrMetadataError(e.errno, e.strerror)
6051                 stdout, stderr = process_communicate_or_kill(p)
6052                 stderr = stderr.decode('utf-8', 'replace')
6053                 if p.returncode != 0:
6054                     raise XAttrMetadataError(p.returncode, stderr)
6055
6056             else:
6057                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6058                 if sys.platform.startswith('linux'):
6059                     raise XAttrUnavailableError(
6060                         "Couldn't find a tool to set the xattrs. "
6061                         "Install either the python 'pyxattr' or 'xattr' "
6062                         "modules, or the GNU 'attr' package "
6063                         "(which contains the 'setfattr' tool).")
6064                 else:
6065                     raise XAttrUnavailableError(
6066                         "Couldn't find a tool to set the xattrs. "
6067                         "Install either the python 'xattr' module, "
6068                         "or the 'xattr' binary.")
6069
6070
6071 def random_birthday(year_field, month_field, day_field):
6072     start_date = datetime.date(1950, 1, 1)
6073     end_date = datetime.date(1995, 12, 31)
6074     offset = random.randint(0, (end_date - start_date).days)
6075     random_date = start_date + datetime.timedelta(offset)
6076     return {
6077         year_field: str(random_date.year),
6078         month_field: str(random_date.month),
6079         day_field: str(random_date.day),
6080     }
6081
6082
6083 # Templates for internet shortcut files, which are plain text files.
6084 DOT_URL_LINK_TEMPLATE = '''
6085 [InternetShortcut]
6086 URL=%(url)s
6087 '''.lstrip()
6088
6089 DOT_WEBLOC_LINK_TEMPLATE = '''
6090 <?xml version="1.0" encoding="UTF-8"?>
6091 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6092 <plist version="1.0">
6093 <dict>
6094 \t<key>URL</key>
6095 \t<string>%(url)s</string>
6096 </dict>
6097 </plist>
6098 '''.lstrip()
6099
6100 DOT_DESKTOP_LINK_TEMPLATE = '''
6101 [Desktop Entry]
6102 Encoding=UTF-8
6103 Name=%(filename)s
6104 Type=Link
6105 URL=%(url)s
6106 Icon=text-html
6107 '''.lstrip()
6108
6109
6110 def iri_to_uri(iri):
6111     """
6112     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6113
6114     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6115     """
6116
6117     iri_parts = compat_urllib_parse_urlparse(iri)
6118
6119     if '[' in iri_parts.netloc:
6120         raise ValueError('IPv6 URIs are not, yet, supported.')
6121         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6122
6123     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6124
6125     net_location = ''
6126     if iri_parts.username:
6127         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6128         if iri_parts.password is not None:
6129             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6130         net_location += '@'
6131
6132     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6133     # The 'idna' encoding produces ASCII text.
6134     if iri_parts.port is not None and iri_parts.port != 80:
6135         net_location += ':' + str(iri_parts.port)
6136
6137     return compat_urllib_parse_urlunparse(
6138         (iri_parts.scheme,
6139             net_location,
6140
6141             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6142
6143             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6144             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6145
6146             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6147             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6148
6149             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6150
6151     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6152
6153
6154 def to_high_limit_path(path):
6155     if sys.platform in ['win32', 'cygwin']:
6156         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6157         return r'\\?\ '.rstrip() + os.path.abspath(path)
6158
6159     return path
6160
6161
6162 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6163     if field is None:
6164         val = obj if obj is not None else default
6165     else:
6166         val = obj.get(field, default)
6167     if func and val not in ignore:
6168         val = func(val)
6169     return template % val if val not in ignore else default
6170
6171
6172 def clean_podcast_url(url):
6173     return re.sub(r'''(?x)
6174         (?:
6175             (?:
6176                 chtbl\.com/track|
6177                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6178                 play\.podtrac\.com
6179             )/[^/]+|
6180             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6181             flex\.acast\.com|
6182             pd(?:
6183                 cn\.co| # https://podcorn.com/analytics-prefix/
6184                 st\.fm # https://podsights.com/docs/
6185             )/e
6186         )/''', '', url)
6187
6188
6189 _HEX_TABLE = '0123456789abcdef'
6190
6191
6192 def random_uuidv4():
6193     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6194
6195
6196 def make_dir(path, to_screen=None):
6197     try:
6198         dn = os.path.dirname(path)
6199         if dn and not os.path.exists(dn):
6200             os.makedirs(dn)
6201         return True
6202     except (OSError, IOError) as err:
6203         if callable(to_screen) is not None:
6204             to_screen('unable to create directory ' + error_to_compat_str(err))
6205         return False
6206
6207
6208 def get_executable_path():
6209     from zipimport import zipimporter
6210     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6211         path = os.path.dirname(sys.executable)
6212     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6213         path = os.path.join(os.path.dirname(__file__), '../..')
6214     else:
6215         path = os.path.join(os.path.dirname(__file__), '..')
6216     return os.path.abspath(path)
6217
6218
6219 def load_plugins(name, suffix, namespace):
6220     plugin_info = [None]
6221     classes = []
6222     try:
6223         plugin_info = imp.find_module(
6224             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6225         plugins = imp.load_module(name, *plugin_info)
6226         for name in dir(plugins):
6227             if name in namespace:
6228                 continue
6229             if not name.endswith(suffix):
6230                 continue
6231             klass = getattr(plugins, name)
6232             classes.append(klass)
6233             namespace[name] = klass
6234     except ImportError:
6235         pass
6236     finally:
6237         if plugin_info[0] is not None:
6238             plugin_info[0].close()
6239     return classes
6240
6241
6242 def traverse_obj(
6243         obj, *path_list, default=None, expected_type=None, get_all=True,
6244         casesense=True, is_user_input=False, traverse_string=False):
6245     ''' Traverse nested list/dict/tuple
6246     @param path_list        A list of paths which are checked one by one.
6247                             Each path is a list of keys where each key is a string,
6248                             a tuple of strings or "...". When a tuple is given,
6249                             all the keys given in the tuple are traversed, and
6250                             "..." traverses all the keys in the object
6251     @param default          Default value to return
6252     @param expected_type    Only accept final value of this type (Can also be any callable)
6253     @param get_all          Return all the values obtained from a path or only the first one
6254     @param casesense        Whether to consider dictionary keys as case sensitive
6255     @param is_user_input    Whether the keys are generated from user input. If True,
6256                             strings are converted to int/slice if necessary
6257     @param traverse_string  Whether to traverse inside strings. If True, any
6258                             non-compatible object will also be converted into a string
6259     # TODO: Write tests
6260     '''
6261     if not casesense:
6262         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6263         path_list = (map(_lower, variadic(path)) for path in path_list)
6264
6265     def _traverse_obj(obj, path, _current_depth=0):
6266         nonlocal depth
6267         if obj is None:
6268             return None
6269         path = tuple(variadic(path))
6270         for i, key in enumerate(path):
6271             if isinstance(key, (list, tuple)):
6272                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6273                 key = ...
6274             if key is ...:
6275                 obj = (obj.values() if isinstance(obj, dict)
6276                        else obj if isinstance(obj, (list, tuple, LazyList))
6277                        else str(obj) if traverse_string else [])
6278                 _current_depth += 1
6279                 depth = max(depth, _current_depth)
6280                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6281             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6282                 obj = (obj.get(key) if casesense or (key in obj)
6283                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6284             else:
6285                 if is_user_input:
6286                     key = (int_or_none(key) if ':' not in key
6287                            else slice(*map(int_or_none, key.split(':'))))
6288                     if key == slice(None):
6289                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6290                 if not isinstance(key, (int, slice)):
6291                     return None
6292                 if not isinstance(obj, (list, tuple, LazyList)):
6293                     if not traverse_string:
6294                         return None
6295                     obj = str(obj)
6296                 try:
6297                     obj = obj[key]
6298                 except IndexError:
6299                     return None
6300         return obj
6301
6302     if isinstance(expected_type, type):
6303         type_test = lambda val: val if isinstance(val, expected_type) else None
6304     elif expected_type is not None:
6305         type_test = expected_type
6306     else:
6307         type_test = lambda val: val
6308
6309     for path in path_list:
6310         depth = 0
6311         val = _traverse_obj(obj, path)
6312         if val is not None:
6313             if depth:
6314                 for _ in range(depth - 1):
6315                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6316                 val = [v for v in map(type_test, val) if v is not None]
6317                 if val:
6318                     return val if get_all else val[0]
6319             else:
6320                 val = type_test(val)
6321                 if val is not None:
6322                     return val
6323     return default
6324
6325
6326 def traverse_dict(dictn, keys, casesense=True):
6327     ''' For backward compatibility. Do not use '''
6328     return traverse_obj(dictn, keys, casesense=casesense,
6329                         is_user_input=True, traverse_string=True)
6330
6331
6332 def variadic(x, allowed_types=(str, bytes)):
6333     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)