yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y/%m/%d',
1744     '%Y/%m/%d %H:%M',
1745     '%Y/%m/%d %H:%M:%S',
1746     '%Y-%m-%d %H:%M',
1747     '%Y-%m-%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M:%S.%f',
1749     '%Y-%m-%d %H:%M:%S:%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf, default=repr)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     if s == '':
2111         return ''
2112     # Handle timestamps
2113     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114     result = ''.join(map(replace_insane, s))
2115     if not is_id:
2116         while '__' in result:
2117             result = result.replace('__', '_')
2118         result = result.strip('_')
2119         # Common case of "Foreign band name - English song title"
2120         if restricted and result.startswith('-_'):
2121             result = result[2:]
2122         if result.startswith('-'):
2123             result = '_' + result[len('-'):]
2124         result = result.lstrip('.')
2125         if not result:
2126             result = '_'
2127     return result
2128
2129
2130 def sanitize_path(s, force=False):
2131     """Sanitizes and normalizes path on Windows"""
2132     if sys.platform == 'win32':
2133         force = False
2134         drive_or_unc, _ = os.path.splitdrive(s)
2135         if sys.version_info < (2, 7) and not drive_or_unc:
2136             drive_or_unc, _ = os.path.splitunc(s)
2137     elif force:
2138         drive_or_unc = ''
2139     else:
2140         return s
2141
2142     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143     if drive_or_unc:
2144         norm_path.pop(0)
2145     sanitized_path = [
2146         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147         for path_part in norm_path]
2148     if drive_or_unc:
2149         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150     elif force and s[0] == os.path.sep:
2151         sanitized_path.insert(0, os.path.sep)
2152     return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157     # the number of unwanted failures due to missing protocol
2158     if url.startswith('//'):
2159         return 'http:%s' % url
2160     # Fix some common typos seen so far
2161     COMMON_TYPOS = (
2162         # https://github.com/ytdl-org/youtube-dl/issues/15649
2163         (r'^httpss://', r'https://'),
2164         # https://bx1.be/lives/direct-tv/
2165         (r'^rmtp([es]?)://', r'rtmp\1://'),
2166     )
2167     for mistake, fixup in COMMON_TYPOS:
2168         if re.match(mistake, url):
2169             return re.sub(mistake, fixup, url)
2170     return url
2171
2172
2173 def extract_basic_auth(url):
2174     parts = compat_urlparse.urlsplit(url)
2175     if parts.username is None:
2176         return url, None
2177     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178         parts.hostname if parts.port is None
2179         else '%s:%d' % (parts.hostname, parts.port))))
2180     auth_payload = base64.b64encode(
2181         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182     return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187     if auth_header is not None:
2188         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189         headers['Authorization'] = auth_header
2190     return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194     """Expand shell variables and ~"""
2195     return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199     """ Remove all duplicates from the input iterable """
2200     res = []
2201     for el in iterable:
2202         if el not in res:
2203             res.append(el)
2204     return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208     """Transforms an HTML entity to a character."""
2209     entity = entity_with_semicolon[:-1]
2210
2211     # Known non-numeric HTML entity
2212     if entity in compat_html_entities.name2codepoint:
2213         return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215     # TODO: HTML5 allows entities without a semicolon. For example,
2216     # '&Eacuteric' should be decoded as 'Éric'.
2217     if entity_with_semicolon in compat_html_entities_html5:
2218         return compat_html_entities_html5[entity_with_semicolon]
2219
2220     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221     if mobj is not None:
2222         numstr = mobj.group(1)
2223         if numstr.startswith('x'):
2224             base = 16
2225             numstr = '0%s' % numstr
2226         else:
2227             base = 10
2228         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229         try:
2230             return compat_chr(int(numstr, base))
2231         except ValueError:
2232             pass
2233
2234     # Unknown entity in name, return its literal representation
2235     return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239     if s is None:
2240         return None
2241     assert type(s) == compat_str
2242
2243     return re.sub(
2244         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def escapeHTML(text):
2248     return (
2249         text
2250         .replace('&', '&amp;')
2251         .replace('<', '&lt;')
2252         .replace('>', '&gt;')
2253         .replace('"', '&quot;')
2254         .replace("'", '&#39;')
2255     )
2256
2257
2258 def process_communicate_or_kill(p, *args, **kwargs):
2259     try:
2260         return p.communicate(*args, **kwargs)
2261     except BaseException:  # Including KeyboardInterrupt
2262         p.kill()
2263         p.wait()
2264         raise
2265
2266
2267 def get_subprocess_encoding():
2268     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269         # For subprocess calls, encode with locale encoding
2270         # Refer to http://stackoverflow.com/a/9951851/35070
2271         encoding = preferredencoding()
2272     else:
2273         encoding = sys.getfilesystemencoding()
2274     if encoding is None:
2275         encoding = 'utf-8'
2276     return encoding
2277
2278
2279 def encodeFilename(s, for_subprocess=False):
2280     """
2281     @param s The name of the file
2282     """
2283
2284     assert type(s) == compat_str
2285
2286     # Python 3 has a Unicode API
2287     if sys.version_info >= (3, 0):
2288         return s
2289
2290     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294         return s
2295
2296     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297     if sys.platform.startswith('java'):
2298         return s
2299
2300     return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def decodeFilename(b, for_subprocess=False):
2304
2305     if sys.version_info >= (3, 0):
2306         return b
2307
2308     if not isinstance(b, bytes):
2309         return b
2310
2311     return b.decode(get_subprocess_encoding(), 'ignore')
2312
2313
2314 def encodeArgument(s):
2315     if not isinstance(s, compat_str):
2316         # Legacy code that uses byte strings
2317         # Uncomment the following line after fixing all post processors
2318         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2319         s = s.decode('ascii')
2320     return encodeFilename(s, True)
2321
2322
2323 def decodeArgument(b):
2324     return decodeFilename(b, True)
2325
2326
2327 def decodeOption(optval):
2328     if optval is None:
2329         return optval
2330     if isinstance(optval, bytes):
2331         optval = optval.decode(preferredencoding())
2332
2333     assert isinstance(optval, compat_str)
2334     return optval
2335
2336
2337 def formatSeconds(secs, delim=':', msec=False):
2338     if secs > 3600:
2339         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2340     elif secs > 60:
2341         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2342     else:
2343         ret = '%d' % secs
2344     return '%s.%03d' % (ret, secs % 1) if msec else ret
2345
2346
2347 def make_HTTPS_handler(params, **kwargs):
2348     opts_no_check_certificate = params.get('nocheckcertificate', False)
2349     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2350         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2351         if opts_no_check_certificate:
2352             context.check_hostname = False
2353             context.verify_mode = ssl.CERT_NONE
2354         try:
2355             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2356         except TypeError:
2357             # Python 2.7.8
2358             # (create_default_context present but HTTPSHandler has no context=)
2359             pass
2360
2361     if sys.version_info < (3, 2):
2362         return YoutubeDLHTTPSHandler(params, **kwargs)
2363     else:  # Python < 3.4
2364         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2365         context.verify_mode = (ssl.CERT_NONE
2366                                if opts_no_check_certificate
2367                                else ssl.CERT_REQUIRED)
2368         context.set_default_verify_paths()
2369         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2370
2371
2372 def bug_reports_message(before=';'):
2373     if ytdl_is_updateable():
2374         update_cmd = 'type  yt-dlp -U  to update'
2375     else:
2376         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2377     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2378     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2379     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2380
2381     before = before.rstrip()
2382     if not before or before.endswith(('.', '!', '?')):
2383         msg = msg[0].title() + msg[1:]
2384
2385     return (before + ' ' if before else '') + msg
2386
2387
2388 class YoutubeDLError(Exception):
2389     """Base exception for YoutubeDL errors."""
2390     pass
2391
2392
2393 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394 if hasattr(ssl, 'CertificateError'):
2395     network_exceptions.append(ssl.CertificateError)
2396 network_exceptions = tuple(network_exceptions)
2397
2398
2399 class ExtractorError(YoutubeDLError):
2400     """Error during info extraction."""
2401
2402     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2403         """ tb, if given, is the original traceback (so that it can be printed out).
2404         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2405         """
2406
2407         if sys.exc_info()[0] in network_exceptions:
2408             expected = True
2409         if video_id is not None:
2410             msg = video_id + ': ' + msg
2411         if cause:
2412             msg += ' (caused by %r)' % cause
2413         if not expected:
2414             msg += bug_reports_message()
2415         super(ExtractorError, self).__init__(msg)
2416
2417         self.traceback = tb
2418         self.exc_info = sys.exc_info()  # preserve original exception
2419         self.cause = cause
2420         self.video_id = video_id
2421
2422     def format_traceback(self):
2423         if self.traceback is None:
2424             return None
2425         return ''.join(traceback.format_tb(self.traceback))
2426
2427
2428 class UnsupportedError(ExtractorError):
2429     def __init__(self, url):
2430         super(UnsupportedError, self).__init__(
2431             'Unsupported URL: %s' % url, expected=True)
2432         self.url = url
2433
2434
2435 class RegexNotFoundError(ExtractorError):
2436     """Error when a regex didn't match"""
2437     pass
2438
2439
2440 class GeoRestrictedError(ExtractorError):
2441     """Geographic restriction Error exception.
2442
2443     This exception may be thrown when a video is not available from your
2444     geographic location due to geographic restrictions imposed by a website.
2445     """
2446
2447     def __init__(self, msg, countries=None):
2448         super(GeoRestrictedError, self).__init__(msg, expected=True)
2449         self.msg = msg
2450         self.countries = countries
2451
2452
2453 class DownloadError(YoutubeDLError):
2454     """Download Error exception.
2455
2456     This exception may be thrown by FileDownloader objects if they are not
2457     configured to continue on errors. They will contain the appropriate
2458     error message.
2459     """
2460
2461     def __init__(self, msg, exc_info=None):
2462         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463         super(DownloadError, self).__init__(msg)
2464         self.exc_info = exc_info
2465
2466
2467 class EntryNotInPlaylist(YoutubeDLError):
2468     """Entry not in playlist exception.
2469
2470     This exception will be thrown by YoutubeDL when a requested entry
2471     is not found in the playlist info_dict
2472     """
2473     pass
2474
2475
2476 class SameFileError(YoutubeDLError):
2477     """Same File exception.
2478
2479     This exception will be thrown by FileDownloader objects if they detect
2480     multiple files would have to be downloaded to the same file on disk.
2481     """
2482     pass
2483
2484
2485 class PostProcessingError(YoutubeDLError):
2486     """Post Processing exception.
2487
2488     This exception may be raised by PostProcessor's .run() method to
2489     indicate an error in the postprocessing task.
2490     """
2491
2492     def __init__(self, msg):
2493         super(PostProcessingError, self).__init__(msg)
2494         self.msg = msg
2495
2496
2497 class ExistingVideoReached(YoutubeDLError):
2498     """ --max-downloads limit has been reached. """
2499     pass
2500
2501
2502 class RejectedVideoReached(YoutubeDLError):
2503     """ --max-downloads limit has been reached. """
2504     pass
2505
2506
2507 class ThrottledDownload(YoutubeDLError):
2508     """ Download speed below --throttled-rate. """
2509     pass
2510
2511
2512 class MaxDownloadsReached(YoutubeDLError):
2513     """ --max-downloads limit has been reached. """
2514     pass
2515
2516
2517 class UnavailableVideoError(YoutubeDLError):
2518     """Unavailable Format exception.
2519
2520     This exception will be thrown when a video is requested
2521     in a format that is not available for that video.
2522     """
2523     pass
2524
2525
2526 class ContentTooShortError(YoutubeDLError):
2527     """Content Too Short exception.
2528
2529     This exception may be raised by FileDownloader objects when a file they
2530     download is too small for what the server announced first, indicating
2531     the connection was probably interrupted.
2532     """
2533
2534     def __init__(self, downloaded, expected):
2535         super(ContentTooShortError, self).__init__(
2536             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537         )
2538         # Both in bytes
2539         self.downloaded = downloaded
2540         self.expected = expected
2541
2542
2543 class XAttrMetadataError(YoutubeDLError):
2544     def __init__(self, code=None, msg='Unknown error'):
2545         super(XAttrMetadataError, self).__init__(msg)
2546         self.code = code
2547         self.msg = msg
2548
2549         # Parsing code and msg
2550         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2551                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2552             self.reason = 'NO_SPACE'
2553         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554             self.reason = 'VALUE_TOO_LONG'
2555         else:
2556             self.reason = 'NOT_SUPPORTED'
2557
2558
2559 class XAttrUnavailableError(YoutubeDLError):
2560     pass
2561
2562
2563 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2564     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565     # expected HTTP responses to meet HTTP/1.0 or later (see also
2566     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2567     if sys.version_info < (3, 0):
2568         kwargs['strict'] = True
2569     hc = http_class(*args, **compat_kwargs(kwargs))
2570     source_address = ydl_handler._params.get('source_address')
2571
2572     if source_address is not None:
2573         # This is to workaround _create_connection() from socket where it will try all
2574         # address data from getaddrinfo() including IPv6. This filters the result from
2575         # getaddrinfo() based on the source_address value.
2576         # This is based on the cpython socket.create_connection() function.
2577         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579             host, port = address
2580             err = None
2581             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2582             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583             ip_addrs = [addr for addr in addrs if addr[0] == af]
2584             if addrs and not ip_addrs:
2585                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586                 raise socket.error(
2587                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588                     % (ip_version, source_address[0]))
2589             for res in ip_addrs:
2590                 af, socktype, proto, canonname, sa = res
2591                 sock = None
2592                 try:
2593                     sock = socket.socket(af, socktype, proto)
2594                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595                         sock.settimeout(timeout)
2596                     sock.bind(source_address)
2597                     sock.connect(sa)
2598                     err = None  # Explicitly break reference cycle
2599                     return sock
2600                 except socket.error as _:
2601                     err = _
2602                     if sock is not None:
2603                         sock.close()
2604             if err is not None:
2605                 raise err
2606             else:
2607                 raise socket.error('getaddrinfo returns an empty list')
2608         if hasattr(hc, '_create_connection'):
2609             hc._create_connection = _create_connection
2610         sa = (source_address, 0)
2611         if hasattr(hc, 'source_address'):  # Python 2.7+
2612             hc.source_address = sa
2613         else:  # Python 2.6
2614             def _hc_connect(self, *args, **kwargs):
2615                 sock = _create_connection(
2616                     (self.host, self.port), self.timeout, sa)
2617                 if is_https:
2618                     self.sock = ssl.wrap_socket(
2619                         sock, self.key_file, self.cert_file,
2620                         ssl_version=ssl.PROTOCOL_TLSv1)
2621                 else:
2622                     self.sock = sock
2623             hc.connect = functools.partial(_hc_connect, hc)
2624
2625     return hc
2626
2627
2628 def handle_youtubedl_headers(headers):
2629     filtered_headers = headers
2630
2631     if 'Youtubedl-no-compression' in filtered_headers:
2632         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2633         del filtered_headers['Youtubedl-no-compression']
2634
2635     return filtered_headers
2636
2637
2638 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2639     """Handler for HTTP requests and responses.
2640
2641     This class, when installed with an OpenerDirector, automatically adds
2642     the standard headers to every HTTP request and handles gzipped and
2643     deflated responses from web servers. If compression is to be avoided in
2644     a particular request, the original request in the program code only has
2645     to include the HTTP header "Youtubedl-no-compression", which will be
2646     removed before making the real request.
2647
2648     Part of this code was copied from:
2649
2650     http://techknack.net/python-urllib2-handlers/
2651
2652     Andrew Rowls, the author of that code, agreed to release it to the
2653     public domain.
2654     """
2655
2656     def __init__(self, params, *args, **kwargs):
2657         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658         self._params = params
2659
2660     def http_open(self, req):
2661         conn_class = compat_http_client.HTTPConnection
2662
2663         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664         if socks_proxy:
2665             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666             del req.headers['Ytdl-socks-proxy']
2667
2668         return self.do_open(functools.partial(
2669             _create_http_connection, self, conn_class, False),
2670             req)
2671
2672     @staticmethod
2673     def deflate(data):
2674         if not data:
2675             return data
2676         try:
2677             return zlib.decompress(data, -zlib.MAX_WBITS)
2678         except zlib.error:
2679             return zlib.decompress(data)
2680
2681     def http_request(self, req):
2682         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683         # always respected by websites, some tend to give out URLs with non percent-encoded
2684         # non-ASCII characters (see telemb.py, ard.py [#3412])
2685         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686         # To work around aforementioned issue we will replace request's original URL with
2687         # percent-encoded one
2688         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690         url = req.get_full_url()
2691         url_escaped = escape_url(url)
2692
2693         # Substitute URL if any change after escaping
2694         if url != url_escaped:
2695             req = update_Request(req, url=url_escaped)
2696
2697         for h, v in std_headers.items():
2698             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699             # The dict keys are capitalized because of this bug by urllib
2700             if h.capitalize() not in req.headers:
2701                 req.add_header(h, v)
2702
2703         req.headers = handle_youtubedl_headers(req.headers)
2704
2705         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706             # Python 2.6 is brain-dead when it comes to fragments
2707             req._Request__original = req._Request__original.partition('#')[0]
2708             req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
2710         return req
2711
2712     def http_response(self, req, resp):
2713         old_resp = resp
2714         # gzip
2715         if resp.headers.get('Content-encoding', '') == 'gzip':
2716             content = resp.read()
2717             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718             try:
2719                 uncompressed = io.BytesIO(gz.read())
2720             except IOError as original_ioerror:
2721                 # There may be junk add the end of the file
2722                 # See http://stackoverflow.com/q/4928560/35070 for details
2723                 for i in range(1, 1024):
2724                     try:
2725                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726                         uncompressed = io.BytesIO(gz.read())
2727                     except IOError:
2728                         continue
2729                     break
2730                 else:
2731                     raise original_ioerror
2732             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2733             resp.msg = old_resp.msg
2734             del resp.headers['Content-encoding']
2735         # deflate
2736         if resp.headers.get('Content-encoding', '') == 'deflate':
2737             gz = io.BytesIO(self.deflate(resp.read()))
2738             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2739             resp.msg = old_resp.msg
2740             del resp.headers['Content-encoding']
2741         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2742         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2743         if 300 <= resp.code < 400:
2744             location = resp.headers.get('Location')
2745             if location:
2746                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747                 if sys.version_info >= (3, 0):
2748                     location = location.encode('iso-8859-1').decode('utf-8')
2749                 else:
2750                     location = location.decode('utf-8')
2751                 location_escaped = escape_url(location)
2752                 if location != location_escaped:
2753                     del resp.headers['Location']
2754                     if sys.version_info < (3, 0):
2755                         location_escaped = location_escaped.encode('utf-8')
2756                     resp.headers['Location'] = location_escaped
2757         return resp
2758
2759     https_request = http_request
2760     https_response = http_response
2761
2762
2763 def make_socks_conn_class(base_class, socks_proxy):
2764     assert issubclass(base_class, (
2765         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767     url_components = compat_urlparse.urlparse(socks_proxy)
2768     if url_components.scheme.lower() == 'socks5':
2769         socks_type = ProxyType.SOCKS5
2770     elif url_components.scheme.lower() in ('socks', 'socks4'):
2771         socks_type = ProxyType.SOCKS4
2772     elif url_components.scheme.lower() == 'socks4a':
2773         socks_type = ProxyType.SOCKS4A
2774
2775     def unquote_if_non_empty(s):
2776         if not s:
2777             return s
2778         return compat_urllib_parse_unquote_plus(s)
2779
2780     proxy_args = (
2781         socks_type,
2782         url_components.hostname, url_components.port or 1080,
2783         True,  # Remote DNS
2784         unquote_if_non_empty(url_components.username),
2785         unquote_if_non_empty(url_components.password),
2786     )
2787
2788     class SocksConnection(base_class):
2789         def connect(self):
2790             self.sock = sockssocket()
2791             self.sock.setproxy(*proxy_args)
2792             if type(self.timeout) in (int, float):
2793                 self.sock.settimeout(self.timeout)
2794             self.sock.connect((self.host, self.port))
2795
2796             if isinstance(self, compat_http_client.HTTPSConnection):
2797                 if hasattr(self, '_context'):  # Python > 2.6
2798                     self.sock = self._context.wrap_socket(
2799                         self.sock, server_hostname=self.host)
2800                 else:
2801                     self.sock = ssl.wrap_socket(self.sock)
2802
2803     return SocksConnection
2804
2805
2806 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810         self._params = params
2811
2812     def https_open(self, req):
2813         kwargs = {}
2814         conn_class = self._https_conn_class
2815
2816         if hasattr(self, '_context'):  # python > 2.6
2817             kwargs['context'] = self._context
2818         if hasattr(self, '_check_hostname'):  # python 3.x
2819             kwargs['check_hostname'] = self._check_hostname
2820
2821         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822         if socks_proxy:
2823             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824             del req.headers['Ytdl-socks-proxy']
2825
2826         return self.do_open(functools.partial(
2827             _create_http_connection, self, conn_class, True),
2828             req, **kwargs)
2829
2830
2831 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2832     """
2833     See [1] for cookie file format.
2834
2835     1. https://curl.haxx.se/docs/http-cookies.html
2836     """
2837     _HTTPONLY_PREFIX = '#HttpOnly_'
2838     _ENTRY_LEN = 7
2839     _HEADER = '''# Netscape HTTP Cookie File
2840 # This file is generated by yt-dlp.  Do not edit.
2841
2842 '''
2843     _CookieFileEntry = collections.namedtuple(
2844         'CookieFileEntry',
2845         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2846
2847     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2848         """
2849         Save cookies to a file.
2850
2851         Most of the code is taken from CPython 3.8 and slightly adapted
2852         to support cookie files with UTF-8 in both python 2 and 3.
2853         """
2854         if filename is None:
2855             if self.filename is not None:
2856                 filename = self.filename
2857             else:
2858                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
2860         # Store session cookies with `expires` set to 0 instead of an empty
2861         # string
2862         for cookie in self:
2863             if cookie.expires is None:
2864                 cookie.expires = 0
2865
2866         with io.open(filename, 'w', encoding='utf-8') as f:
2867             f.write(self._HEADER)
2868             now = time.time()
2869             for cookie in self:
2870                 if not ignore_discard and cookie.discard:
2871                     continue
2872                 if not ignore_expires and cookie.is_expired(now):
2873                     continue
2874                 if cookie.secure:
2875                     secure = 'TRUE'
2876                 else:
2877                     secure = 'FALSE'
2878                 if cookie.domain.startswith('.'):
2879                     initial_dot = 'TRUE'
2880                 else:
2881                     initial_dot = 'FALSE'
2882                 if cookie.expires is not None:
2883                     expires = compat_str(cookie.expires)
2884                 else:
2885                     expires = ''
2886                 if cookie.value is None:
2887                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888                     # with no name, whereas http.cookiejar regards it as a
2889                     # cookie with no value.
2890                     name = ''
2891                     value = cookie.name
2892                 else:
2893                     name = cookie.name
2894                     value = cookie.value
2895                 f.write(
2896                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2897                                secure, expires, name, value]) + '\n')
2898
2899     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2900         """Load cookies from a file."""
2901         if filename is None:
2902             if self.filename is not None:
2903                 filename = self.filename
2904             else:
2905                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
2907         def prepare_line(line):
2908             if line.startswith(self._HTTPONLY_PREFIX):
2909                 line = line[len(self._HTTPONLY_PREFIX):]
2910             # comments and empty lines are fine
2911             if line.startswith('#') or not line.strip():
2912                 return line
2913             cookie_list = line.split('\t')
2914             if len(cookie_list) != self._ENTRY_LEN:
2915                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916             cookie = self._CookieFileEntry(*cookie_list)
2917             if cookie.expires_at and not cookie.expires_at.isdigit():
2918                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919             return line
2920
2921         cf = io.StringIO()
2922         with io.open(filename, encoding='utf-8') as f:
2923             for line in f:
2924                 try:
2925                     cf.write(prepare_line(line))
2926                 except compat_cookiejar.LoadError as e:
2927                     write_string(
2928                         'WARNING: skipping cookie file entry due to %s: %r\n'
2929                         % (e, line), sys.stderr)
2930                     continue
2931         cf.seek(0)
2932         self._really_load(cf, filename, ignore_discard, ignore_expires)
2933         # Session cookies are denoted by either `expires` field set to
2934         # an empty string or 0. MozillaCookieJar only recognizes the former
2935         # (see [1]). So we need force the latter to be recognized as session
2936         # cookies on our own.
2937         # Session cookies may be important for cookies-based authentication,
2938         # e.g. usually, when user does not check 'Remember me' check box while
2939         # logging in on a site, some important cookies are stored as session
2940         # cookies so that not recognizing them will result in failed login.
2941         # 1. https://bugs.python.org/issue17164
2942         for cookie in self:
2943             # Treat `expires=0` cookies as session cookies
2944             if cookie.expires == 0:
2945                 cookie.expires = None
2946                 cookie.discard = True
2947
2948
2949 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950     def __init__(self, cookiejar=None):
2951         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953     def http_response(self, request, response):
2954         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955         # characters in Set-Cookie HTTP header of last response (see
2956         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2957         # In order to at least prevent crashing we will percent encode Set-Cookie
2958         # header before HTTPCookieProcessor starts processing it.
2959         # if sys.version_info < (3, 0) and response.headers:
2960         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961         #         set_cookie = response.headers.get(set_cookie_header)
2962         #         if set_cookie:
2963         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964         #             if set_cookie != set_cookie_escaped:
2965         #                 del response.headers[set_cookie_header]
2966         #                 response.headers[set_cookie_header] = set_cookie_escaped
2967         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
2969     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2970     https_response = http_response
2971
2972
2973 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2974     """YoutubeDL redirect handler
2975
2976     The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978     This redirect handler solves two issues:
2979      - ensures redirect URL is always unicode under python 2
2980      - introduces support for experimental HTTP response status code
2981        308 Permanent Redirect [2] used by some sites [3]
2982
2983     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986     """
2987
2988     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990     def redirect_request(self, req, fp, code, msg, headers, newurl):
2991         """Return a Request or None in response to a redirect.
2992
2993         This is called by the http_error_30x methods when a
2994         redirection response is received.  If a redirection should
2995         take place, return a new Request to allow http_error_30x to
2996         perform the redirect.  Otherwise, raise HTTPError if no-one
2997         else should try to handle this url.  Return None if you can't
2998         but another Handler might.
2999         """
3000         m = req.get_method()
3001         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002                  or code in (301, 302, 303) and m == "POST")):
3003             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004         # Strictly (according to RFC 2616), 301 or 302 in response to
3005         # a POST MUST NOT cause a redirection without confirmation
3006         # from the user (of urllib.request, in this case).  In practice,
3007         # essentially all clients do redirect in this case, so we do
3008         # the same.
3009
3010         # On python 2 urlh.geturl() may sometimes return redirect URL
3011         # as byte string instead of unicode. This workaround allows
3012         # to force it always return unicode.
3013         if sys.version_info[0] < 3:
3014             newurl = compat_str(newurl)
3015
3016         # Be conciliant with URIs containing a space.  This is mainly
3017         # redundant with the more complete encoding done in http_error_302(),
3018         # but it is kept for compatibility with other callers.
3019         newurl = newurl.replace(' ', '%20')
3020
3021         CONTENT_HEADERS = ("content-length", "content-type")
3022         # NB: don't use dict comprehension for python 2.6 compatibility
3023         newheaders = dict((k, v) for k, v in req.headers.items()
3024                           if k.lower() not in CONTENT_HEADERS)
3025         return compat_urllib_request.Request(
3026             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027             unverifiable=True)
3028
3029
3030 def extract_timezone(date_str):
3031     m = re.search(
3032         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033         date_str)
3034     if not m:
3035         timezone = datetime.timedelta()
3036     else:
3037         date_str = date_str[:-len(m.group('tz'))]
3038         if not m.group('sign'):
3039             timezone = datetime.timedelta()
3040         else:
3041             sign = 1 if m.group('sign') == '+' else -1
3042             timezone = datetime.timedelta(
3043                 hours=sign * int(m.group('hours')),
3044                 minutes=sign * int(m.group('minutes')))
3045     return timezone, date_str
3046
3047
3048 def parse_iso8601(date_str, delimiter='T', timezone=None):
3049     """ Return a UNIX timestamp from the given date """
3050
3051     if date_str is None:
3052         return None
3053
3054     date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
3056     if timezone is None:
3057         timezone, date_str = extract_timezone(date_str)
3058
3059     try:
3060         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062         return calendar.timegm(dt.timetuple())
3063     except ValueError:
3064         pass
3065
3066
3067 def date_formats(day_first=True):
3068     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
3071 def unified_strdate(date_str, day_first=True):
3072     """Return a string with the date in the format YYYYMMDD"""
3073
3074     if date_str is None:
3075         return None
3076     upload_date = None
3077     # Replace commas
3078     date_str = date_str.replace(',', ' ')
3079     # Remove AM/PM + timezone
3080     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3081     _, date_str = extract_timezone(date_str)
3082
3083     for expression in date_formats(day_first):
3084         try:
3085             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3086         except ValueError:
3087             pass
3088     if upload_date is None:
3089         timetuple = email.utils.parsedate_tz(date_str)
3090         if timetuple:
3091             try:
3092                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093             except ValueError:
3094                 pass
3095     if upload_date is not None:
3096         return compat_str(upload_date)
3097
3098
3099 def unified_timestamp(date_str, day_first=True):
3100     if date_str is None:
3101         return None
3102
3103     date_str = re.sub(r'[,|]', '', date_str)
3104
3105     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3106     timezone, date_str = extract_timezone(date_str)
3107
3108     # Remove AM/PM + timezone
3109     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
3111     # Remove unrecognized timezones from ISO 8601 alike timestamps
3112     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113     if m:
3114         date_str = date_str[:-len(m.group('tz'))]
3115
3116     # Python only supports microseconds, so remove nanoseconds
3117     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118     if m:
3119         date_str = m.group(1)
3120
3121     for expression in date_formats(day_first):
3122         try:
3123             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3124             return calendar.timegm(dt.timetuple())
3125         except ValueError:
3126             pass
3127     timetuple = email.utils.parsedate_tz(date_str)
3128     if timetuple:
3129         return calendar.timegm(timetuple) + pm_delta * 3600
3130
3131
3132 def determine_ext(url, default_ext='unknown_video'):
3133     if url is None or '.' not in url:
3134         return default_ext
3135     guess = url.partition('?')[0].rpartition('.')[2]
3136     if re.match(r'^[A-Za-z0-9]+$', guess):
3137         return guess
3138     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3140         return guess.rstrip('/')
3141     else:
3142         return default_ext
3143
3144
3145 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3147
3148
3149 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3150     """
3151     Return a datetime object from a string in the format YYYYMMDD or
3152     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154     format: string date format used to return datetime object from
3155     precision: round the time portion of a datetime object.
3156                 auto|microsecond|second|minute|hour|day.
3157                 auto: round to the unit provided in date_str (if applicable).
3158     """
3159     auto_precision = False
3160     if precision == 'auto':
3161         auto_precision = True
3162         precision = 'microsecond'
3163     today = datetime_round(datetime.datetime.now(), precision)
3164     if date_str in ('now', 'today'):
3165         return today
3166     if date_str == 'yesterday':
3167         return today - datetime.timedelta(days=1)
3168     match = re.match(
3169         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170         date_str)
3171     if match is not None:
3172         start_time = datetime_from_str(match.group('start'), precision, format)
3173         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3174         unit = match.group('unit')
3175         if unit == 'month' or unit == 'year':
3176             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3177             unit = 'day'
3178         else:
3179             if unit == 'week':
3180                 unit = 'day'
3181                 time *= 7
3182             delta = datetime.timedelta(**{unit + 's': time})
3183             new_date = start_time + delta
3184         if auto_precision:
3185             return datetime_round(new_date, unit)
3186         return new_date
3187
3188     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191 def date_from_str(date_str, format='%Y%m%d'):
3192     """
3193     Return a datetime object from a string in the format YYYYMMDD or
3194     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196     format: string date format used to return datetime object from
3197     """
3198     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201 def datetime_add_months(dt, months):
3202     """Increment/Decrement a datetime object by months."""
3203     month = dt.month + months - 1
3204     year = dt.year + month // 12
3205     month = month % 12 + 1
3206     day = min(dt.day, calendar.monthrange(year, month)[1])
3207     return dt.replace(year, month, day)
3208
3209
3210 def datetime_round(dt, precision='day'):
3211     """
3212     Round a datetime object's time to a specific precision
3213     """
3214     if precision == 'microsecond':
3215         return dt
3216
3217     unit_seconds = {
3218         'day': 86400,
3219         'hour': 3600,
3220         'minute': 60,
3221         'second': 1,
3222     }
3223     roundto = lambda x, n: ((x + n / 2) // n) * n
3224     timestamp = calendar.timegm(dt.timetuple())
3225     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3226
3227
3228 def hyphenate_date(date_str):
3229     """
3230     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232     if match is not None:
3233         return '-'.join(match.groups())
3234     else:
3235         return date_str
3236
3237
3238 class DateRange(object):
3239     """Represents a time interval between two dates"""
3240
3241     def __init__(self, start=None, end=None):
3242         """start and end must be strings in the format accepted by date"""
3243         if start is not None:
3244             self.start = date_from_str(start)
3245         else:
3246             self.start = datetime.datetime.min.date()
3247         if end is not None:
3248             self.end = date_from_str(end)
3249         else:
3250             self.end = datetime.datetime.max.date()
3251         if self.start > self.end:
3252             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3253
3254     @classmethod
3255     def day(cls, day):
3256         """Returns a range that only contains the given day"""
3257         return cls(day, day)
3258
3259     def __contains__(self, date):
3260         """Check if the date is in the range"""
3261         if not isinstance(date, datetime.date):
3262             date = date_from_str(date)
3263         return self.start <= date <= self.end
3264
3265     def __str__(self):
3266         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3267
3268
3269 def platform_name():
3270     """ Returns the platform name as a compat_str """
3271     res = platform.platform()
3272     if isinstance(res, bytes):
3273         res = res.decode(preferredencoding())
3274
3275     assert isinstance(res, compat_str)
3276     return res
3277
3278
3279 def _windows_write_string(s, out):
3280     """ Returns True if the string was written using special methods,
3281     False if it has yet to be written out."""
3282     # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284     import ctypes
3285     import ctypes.wintypes
3286
3287     WIN_OUTPUT_IDS = {
3288         1: -11,
3289         2: -12,
3290     }
3291
3292     try:
3293         fileno = out.fileno()
3294     except AttributeError:
3295         # If the output stream doesn't have a fileno, it's virtual
3296         return False
3297     except io.UnsupportedOperation:
3298         # Some strange Windows pseudo files?
3299         return False
3300     if fileno not in WIN_OUTPUT_IDS:
3301         return False
3302
3303     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3304         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3305         ('GetStdHandle', ctypes.windll.kernel32))
3306     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
3308     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3309         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3311         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3312     written = ctypes.wintypes.DWORD(0)
3313
3314     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3315     FILE_TYPE_CHAR = 0x0002
3316     FILE_TYPE_REMOTE = 0x8000
3317     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3318         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319         ctypes.POINTER(ctypes.wintypes.DWORD))(
3320         ('GetConsoleMode', ctypes.windll.kernel32))
3321     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323     def not_a_console(handle):
3324         if handle == INVALID_HANDLE_VALUE or handle is None:
3325             return True
3326         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3328
3329     if not_a_console(h):
3330         return False
3331
3332     def next_nonbmp_pos(s):
3333         try:
3334             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335         except StopIteration:
3336             return len(s)
3337
3338     while s:
3339         count = min(next_nonbmp_pos(s), 1024)
3340
3341         ret = WriteConsoleW(
3342             h, s, count if count else 2, ctypes.byref(written), None)
3343         if ret == 0:
3344             raise OSError('Failed to write string')
3345         if not count:  # We just wrote a non-BMP character
3346             assert written.value == 2
3347             s = s[1:]
3348         else:
3349             assert written.value > 0
3350             s = s[written.value:]
3351     return True
3352
3353
3354 def write_string(s, out=None, encoding=None):
3355     if out is None:
3356         out = sys.stderr
3357     assert type(s) == compat_str
3358
3359     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360         if _windows_write_string(s, out):
3361             return
3362
3363     if ('b' in getattr(out, 'mode', '')
3364             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3365         byt = s.encode(encoding or preferredencoding(), 'ignore')
3366         out.write(byt)
3367     elif hasattr(out, 'buffer'):
3368         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369         byt = s.encode(enc, 'ignore')
3370         out.buffer.write(byt)
3371     else:
3372         out.write(s)
3373     out.flush()
3374
3375
3376 def bytes_to_intlist(bs):
3377     if not bs:
3378         return []
3379     if isinstance(bs[0], int):  # Python 3
3380         return list(bs)
3381     else:
3382         return [ord(c) for c in bs]
3383
3384
3385 def intlist_to_bytes(xs):
3386     if not xs:
3387         return b''
3388     return compat_struct_pack('%dB' % len(xs), *xs)
3389
3390
3391 # Cross-platform file locking
3392 if sys.platform == 'win32':
3393     import ctypes.wintypes
3394     import msvcrt
3395
3396     class OVERLAPPED(ctypes.Structure):
3397         _fields_ = [
3398             ('Internal', ctypes.wintypes.LPVOID),
3399             ('InternalHigh', ctypes.wintypes.LPVOID),
3400             ('Offset', ctypes.wintypes.DWORD),
3401             ('OffsetHigh', ctypes.wintypes.DWORD),
3402             ('hEvent', ctypes.wintypes.HANDLE),
3403         ]
3404
3405     kernel32 = ctypes.windll.kernel32
3406     LockFileEx = kernel32.LockFileEx
3407     LockFileEx.argtypes = [
3408         ctypes.wintypes.HANDLE,     # hFile
3409         ctypes.wintypes.DWORD,      # dwFlags
3410         ctypes.wintypes.DWORD,      # dwReserved
3411         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3412         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3413         ctypes.POINTER(OVERLAPPED)  # Overlapped
3414     ]
3415     LockFileEx.restype = ctypes.wintypes.BOOL
3416     UnlockFileEx = kernel32.UnlockFileEx
3417     UnlockFileEx.argtypes = [
3418         ctypes.wintypes.HANDLE,     # hFile
3419         ctypes.wintypes.DWORD,      # dwReserved
3420         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3421         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3422         ctypes.POINTER(OVERLAPPED)  # Overlapped
3423     ]
3424     UnlockFileEx.restype = ctypes.wintypes.BOOL
3425     whole_low = 0xffffffff
3426     whole_high = 0x7fffffff
3427
3428     def _lock_file(f, exclusive):
3429         overlapped = OVERLAPPED()
3430         overlapped.Offset = 0
3431         overlapped.OffsetHigh = 0
3432         overlapped.hEvent = 0
3433         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434         handle = msvcrt.get_osfhandle(f.fileno())
3435         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436                           whole_low, whole_high, f._lock_file_overlapped_p):
3437             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439     def _unlock_file(f):
3440         assert f._lock_file_overlapped_p
3441         handle = msvcrt.get_osfhandle(f.fileno())
3442         if not UnlockFileEx(handle, 0,
3443                             whole_low, whole_high, f._lock_file_overlapped_p):
3444             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446 else:
3447     # Some platforms, such as Jython, is missing fcntl
3448     try:
3449         import fcntl
3450
3451         def _lock_file(f, exclusive):
3452             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3453
3454         def _unlock_file(f):
3455             fcntl.flock(f, fcntl.LOCK_UN)
3456     except ImportError:
3457         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459         def _lock_file(f, exclusive):
3460             raise IOError(UNSUPPORTED_MSG)
3461
3462         def _unlock_file(f):
3463             raise IOError(UNSUPPORTED_MSG)
3464
3465
3466 class locked_file(object):
3467     def __init__(self, filename, mode, encoding=None):
3468         assert mode in ['r', 'a', 'w']
3469         self.f = io.open(filename, mode, encoding=encoding)
3470         self.mode = mode
3471
3472     def __enter__(self):
3473         exclusive = self.mode != 'r'
3474         try:
3475             _lock_file(self.f, exclusive)
3476         except IOError:
3477             self.f.close()
3478             raise
3479         return self
3480
3481     def __exit__(self, etype, value, traceback):
3482         try:
3483             _unlock_file(self.f)
3484         finally:
3485             self.f.close()
3486
3487     def __iter__(self):
3488         return iter(self.f)
3489
3490     def write(self, *args):
3491         return self.f.write(*args)
3492
3493     def read(self, *args):
3494         return self.f.read(*args)
3495
3496
3497 def get_filesystem_encoding():
3498     encoding = sys.getfilesystemencoding()
3499     return encoding if encoding is not None else 'utf-8'
3500
3501
3502 def shell_quote(args):
3503     quoted_args = []
3504     encoding = get_filesystem_encoding()
3505     for a in args:
3506         if isinstance(a, bytes):
3507             # We may get a filename encoded with 'encodeFilename'
3508             a = a.decode(encoding)
3509         quoted_args.append(compat_shlex_quote(a))
3510     return ' '.join(quoted_args)
3511
3512
3513 def smuggle_url(url, data):
3514     """ Pass additional data in a URL for internal use. """
3515
3516     url, idata = unsmuggle_url(url, {})
3517     data.update(idata)
3518     sdata = compat_urllib_parse_urlencode(
3519         {'__youtubedl_smuggle': json.dumps(data)})
3520     return url + '#' + sdata
3521
3522
3523 def unsmuggle_url(smug_url, default=None):
3524     if '#__youtubedl_smuggle' not in smug_url:
3525         return smug_url, default
3526     url, _, sdata = smug_url.rpartition('#')
3527     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3528     data = json.loads(jsond)
3529     return url, data
3530
3531
3532 def format_bytes(bytes):
3533     if bytes is None:
3534         return 'N/A'
3535     if type(bytes) is str:
3536         bytes = float(bytes)
3537     if bytes == 0.0:
3538         exponent = 0
3539     else:
3540         exponent = int(math.log(bytes, 1024.0))
3541     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3542     converted = float(bytes) / float(1024 ** exponent)
3543     return '%.2f%s' % (converted, suffix)
3544
3545
3546 def lookup_unit_table(unit_table, s):
3547     units_re = '|'.join(re.escape(u) for u in unit_table)
3548     m = re.match(
3549         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3550     if not m:
3551         return None
3552     num_str = m.group('num').replace(',', '.')
3553     mult = unit_table[m.group('unit')]
3554     return int(float(num_str) * mult)
3555
3556
3557 def parse_filesize(s):
3558     if s is None:
3559         return None
3560
3561     # The lower-case forms are of course incorrect and unofficial,
3562     # but we support those too
3563     _UNIT_TABLE = {
3564         'B': 1,
3565         'b': 1,
3566         'bytes': 1,
3567         'KiB': 1024,
3568         'KB': 1000,
3569         'kB': 1024,
3570         'Kb': 1000,
3571         'kb': 1000,
3572         'kilobytes': 1000,
3573         'kibibytes': 1024,
3574         'MiB': 1024 ** 2,
3575         'MB': 1000 ** 2,
3576         'mB': 1024 ** 2,
3577         'Mb': 1000 ** 2,
3578         'mb': 1000 ** 2,
3579         'megabytes': 1000 ** 2,
3580         'mebibytes': 1024 ** 2,
3581         'GiB': 1024 ** 3,
3582         'GB': 1000 ** 3,
3583         'gB': 1024 ** 3,
3584         'Gb': 1000 ** 3,
3585         'gb': 1000 ** 3,
3586         'gigabytes': 1000 ** 3,
3587         'gibibytes': 1024 ** 3,
3588         'TiB': 1024 ** 4,
3589         'TB': 1000 ** 4,
3590         'tB': 1024 ** 4,
3591         'Tb': 1000 ** 4,
3592         'tb': 1000 ** 4,
3593         'terabytes': 1000 ** 4,
3594         'tebibytes': 1024 ** 4,
3595         'PiB': 1024 ** 5,
3596         'PB': 1000 ** 5,
3597         'pB': 1024 ** 5,
3598         'Pb': 1000 ** 5,
3599         'pb': 1000 ** 5,
3600         'petabytes': 1000 ** 5,
3601         'pebibytes': 1024 ** 5,
3602         'EiB': 1024 ** 6,
3603         'EB': 1000 ** 6,
3604         'eB': 1024 ** 6,
3605         'Eb': 1000 ** 6,
3606         'eb': 1000 ** 6,
3607         'exabytes': 1000 ** 6,
3608         'exbibytes': 1024 ** 6,
3609         'ZiB': 1024 ** 7,
3610         'ZB': 1000 ** 7,
3611         'zB': 1024 ** 7,
3612         'Zb': 1000 ** 7,
3613         'zb': 1000 ** 7,
3614         'zettabytes': 1000 ** 7,
3615         'zebibytes': 1024 ** 7,
3616         'YiB': 1024 ** 8,
3617         'YB': 1000 ** 8,
3618         'yB': 1024 ** 8,
3619         'Yb': 1000 ** 8,
3620         'yb': 1000 ** 8,
3621         'yottabytes': 1000 ** 8,
3622         'yobibytes': 1024 ** 8,
3623     }
3624
3625     return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628 def parse_count(s):
3629     if s is None:
3630         return None
3631
3632     s = s.strip()
3633
3634     if re.match(r'^[\d,.]+$', s):
3635         return str_to_int(s)
3636
3637     _UNIT_TABLE = {
3638         'k': 1000,
3639         'K': 1000,
3640         'm': 1000 ** 2,
3641         'M': 1000 ** 2,
3642         'kk': 1000 ** 2,
3643         'KK': 1000 ** 2,
3644     }
3645
3646     return lookup_unit_table(_UNIT_TABLE, s)
3647
3648
3649 def parse_resolution(s):
3650     if s is None:
3651         return {}
3652
3653     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654     if mobj:
3655         return {
3656             'width': int(mobj.group('w')),
3657             'height': int(mobj.group('h')),
3658         }
3659
3660     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661     if mobj:
3662         return {'height': int(mobj.group(1))}
3663
3664     mobj = re.search(r'\b([48])[kK]\b', s)
3665     if mobj:
3666         return {'height': int(mobj.group(1)) * 540}
3667
3668     return {}
3669
3670
3671 def parse_bitrate(s):
3672     if not isinstance(s, compat_str):
3673         return
3674     mobj = re.search(r'\b(\d+)\s*kbps', s)
3675     if mobj:
3676         return int(mobj.group(1))
3677
3678
3679 def month_by_name(name, lang='en'):
3680     """ Return the number of a month by (locale-independently) English name """
3681
3682     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3683
3684     try:
3685         return month_names.index(name) + 1
3686     except ValueError:
3687         return None
3688
3689
3690 def month_by_abbreviation(abbrev):
3691     """ Return the number of a month by (locale-independently) English
3692         abbreviations """
3693
3694     try:
3695         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3696     except ValueError:
3697         return None
3698
3699
3700 def fix_xml_ampersands(xml_str):
3701     """Replace all the '&' by '&amp;' in XML"""
3702     return re.sub(
3703         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3704         '&amp;',
3705         xml_str)
3706
3707
3708 def setproctitle(title):
3709     assert isinstance(title, compat_str)
3710
3711     # ctypes in Jython is not complete
3712     # http://bugs.jython.org/issue2148
3713     if sys.platform.startswith('java'):
3714         return
3715
3716     try:
3717         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3718     except OSError:
3719         return
3720     except TypeError:
3721         # LoadLibrary in Windows Python 2.7.13 only expects
3722         # a bytestring, but since unicode_literals turns
3723         # every string into a unicode string, it fails.
3724         return
3725     title_bytes = title.encode('utf-8')
3726     buf = ctypes.create_string_buffer(len(title_bytes))
3727     buf.value = title_bytes
3728     try:
3729         libc.prctl(15, buf, 0, 0, 0)
3730     except AttributeError:
3731         return  # Strange libc, just skip this
3732
3733
3734 def remove_start(s, start):
3735     return s[len(start):] if s is not None and s.startswith(start) else s
3736
3737
3738 def remove_end(s, end):
3739     return s[:-len(end)] if s is not None and s.endswith(end) else s
3740
3741
3742 def remove_quotes(s):
3743     if s is None or len(s) < 2:
3744         return s
3745     for quote in ('"', "'", ):
3746         if s[0] == quote and s[-1] == quote:
3747             return s[1:-1]
3748     return s
3749
3750
3751 def get_domain(url):
3752     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753     return domain.group('domain') if domain else None
3754
3755
3756 def url_basename(url):
3757     path = compat_urlparse.urlparse(url).path
3758     return path.strip('/').split('/')[-1]
3759
3760
3761 def base_url(url):
3762     return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
3765 def urljoin(base, path):
3766     if isinstance(path, bytes):
3767         path = path.decode('utf-8')
3768     if not isinstance(path, compat_str) or not path:
3769         return None
3770     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3771         return path
3772     if isinstance(base, bytes):
3773         base = base.decode('utf-8')
3774     if not isinstance(base, compat_str) or not re.match(
3775             r'^(?:https?:)?//', base):
3776         return None
3777     return compat_urlparse.urljoin(base, path)
3778
3779
3780 class HEADRequest(compat_urllib_request.Request):
3781     def get_method(self):
3782         return 'HEAD'
3783
3784
3785 class PUTRequest(compat_urllib_request.Request):
3786     def get_method(self):
3787         return 'PUT'
3788
3789
3790 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3791     if get_attr:
3792         if v is not None:
3793             v = getattr(v, get_attr, None)
3794     if v == '':
3795         v = None
3796     if v is None:
3797         return default
3798     try:
3799         return int(v) * invscale // scale
3800     except (ValueError, TypeError):
3801         return default
3802
3803
3804 def str_or_none(v, default=None):
3805     return default if v is None else compat_str(v)
3806
3807
3808 def str_to_int(int_str):
3809     """ A more relaxed version of int_or_none """
3810     if isinstance(int_str, compat_integer_types):
3811         return int_str
3812     elif isinstance(int_str, compat_str):
3813         int_str = re.sub(r'[,\.\+]', '', int_str)
3814         return int_or_none(int_str)
3815
3816
3817 def float_or_none(v, scale=1, invscale=1, default=None):
3818     if v is None:
3819         return default
3820     try:
3821         return float(v) * invscale / scale
3822     except (ValueError, TypeError):
3823         return default
3824
3825
3826 def bool_or_none(v, default=None):
3827     return v if isinstance(v, bool) else default
3828
3829
3830 def strip_or_none(v, default=None):
3831     return v.strip() if isinstance(v, compat_str) else default
3832
3833
3834 def url_or_none(url):
3835     if not url or not isinstance(url, compat_str):
3836         return None
3837     url = url.strip()
3838     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3839
3840
3841 def strftime_or_none(timestamp, date_format, default=None):
3842     datetime_object = None
3843     try:
3844         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3845             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3847             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848         return datetime_object.strftime(date_format)
3849     except (ValueError, TypeError, AttributeError):
3850         return default
3851
3852
3853 def parse_duration(s):
3854     if not isinstance(s, compat_basestring):
3855         return None
3856
3857     s = s.strip()
3858
3859     days, hours, mins, secs, ms = [None] * 5
3860     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3861     if m:
3862         days, hours, mins, secs, ms = m.groups()
3863     else:
3864         m = re.match(
3865             r'''(?ix)(?:P?
3866                 (?:
3867                     [0-9]+\s*y(?:ears?)?\s*
3868                 )?
3869                 (?:
3870                     [0-9]+\s*m(?:onths?)?\s*
3871                 )?
3872                 (?:
3873                     [0-9]+\s*w(?:eeks?)?\s*
3874                 )?
3875                 (?:
3876                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3877                 )?
3878                 T)?
3879                 (?:
3880                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881                 )?
3882                 (?:
3883                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884                 )?
3885                 (?:
3886                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3887                 )?Z?$''', s)
3888         if m:
3889             days, hours, mins, secs, ms = m.groups()
3890         else:
3891             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3892             if m:
3893                 hours, mins = m.groups()
3894             else:
3895                 return None
3896
3897     duration = 0
3898     if secs:
3899         duration += float(secs)
3900     if mins:
3901         duration += float(mins) * 60
3902     if hours:
3903         duration += float(hours) * 60 * 60
3904     if days:
3905         duration += float(days) * 24 * 60 * 60
3906     if ms:
3907         duration += float(ms)
3908     return duration
3909
3910
3911 def prepend_extension(filename, ext, expected_real_ext=None):
3912     name, real_ext = os.path.splitext(filename)
3913     return (
3914         '{0}.{1}{2}'.format(name, ext, real_ext)
3915         if not expected_real_ext or real_ext[1:] == expected_real_ext
3916         else '{0}.{1}'.format(filename, ext))
3917
3918
3919 def replace_extension(filename, ext, expected_real_ext=None):
3920     name, real_ext = os.path.splitext(filename)
3921     return '{0}.{1}'.format(
3922         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923         ext)
3924
3925
3926 def check_executable(exe, args=[]):
3927     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928     args can be a list of arguments for a short output (like -version) """
3929     try:
3930         process_communicate_or_kill(subprocess.Popen(
3931             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3932     except OSError:
3933         return False
3934     return exe
3935
3936
3937 def get_exe_version(exe, args=['--version'],
3938                     version_re=None, unrecognized='present'):
3939     """ Returns the version of the specified executable,
3940     or False if the executable is not present """
3941     try:
3942         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3943         # SIGTTOU if yt-dlp is run in the background.
3944         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3945         out, _ = process_communicate_or_kill(subprocess.Popen(
3946             [encodeArgument(exe)] + args,
3947             stdin=subprocess.PIPE,
3948             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3949     except OSError:
3950         return False
3951     if isinstance(out, bytes):  # Python 2.x
3952         out = out.decode('ascii', 'ignore')
3953     return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956 def detect_exe_version(output, version_re=None, unrecognized='present'):
3957     assert isinstance(output, compat_str)
3958     if version_re is None:
3959         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960     m = re.search(version_re, output)
3961     if m:
3962         return m.group(1)
3963     else:
3964         return unrecognized
3965
3966
3967 class LazyList(collections.abc.Sequence):
3968     ''' Lazy immutable list from an iterable
3969     Note that slices of a LazyList are lists and not LazyList'''
3970
3971     def __init__(self, iterable):
3972         self.__iterable = iter(iterable)
3973         self.__cache = []
3974         self.__reversed = False
3975
3976     def __iter__(self):
3977         if self.__reversed:
3978             # We need to consume the entire iterable to iterate in reverse
3979             yield from self.exhaust()
3980             return
3981         yield from self.__cache
3982         for item in self.__iterable:
3983             self.__cache.append(item)
3984             yield item
3985
3986     def __exhaust(self):
3987         self.__cache.extend(self.__iterable)
3988         return self.__cache
3989
3990     def exhaust(self):
3991         ''' Evaluate the entire iterable '''
3992         return self.__exhaust()[::-1 if self.__reversed else 1]
3993
3994     @staticmethod
3995     def __reverse_index(x):
3996         return None if x is None else -(x + 1)
3997
3998     def __getitem__(self, idx):
3999         if isinstance(idx, slice):
4000             if self.__reversed:
4001                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4002             start, stop, step = idx.start, idx.stop, idx.step or 1
4003         elif isinstance(idx, int):
4004             if self.__reversed:
4005                 idx = self.__reverse_index(idx)
4006             start, stop, step = idx, idx, 0
4007         else:
4008             raise TypeError('indices must be integers or slices')
4009         if ((start or 0) < 0 or (stop or 0) < 0
4010                 or (start is None and step < 0)
4011                 or (stop is None and step > 0)):
4012             # We need to consume the entire iterable to be able to slice from the end
4013             # Obviously, never use this with infinite iterables
4014             return self.__exhaust()[idx]
4015
4016         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4017         if n > 0:
4018             self.__cache.extend(itertools.islice(self.__iterable, n))
4019         return self.__cache[idx]
4020
4021     def __bool__(self):
4022         try:
4023             self[-1] if self.__reversed else self[0]
4024         except IndexError:
4025             return False
4026         return True
4027
4028     def __len__(self):
4029         self.exhaust()
4030         return len(self.__cache)
4031
4032     def reverse(self):
4033         self.__reversed = not self.__reversed
4034         return self
4035
4036     def __repr__(self):
4037         # repr and str should mimic a list. So we exhaust the iterable
4038         return repr(self.exhaust())
4039
4040     def __str__(self):
4041         return repr(self.exhaust())
4042
4043
4044 class PagedList(object):
4045     def __len__(self):
4046         # This is only useful for tests
4047         return len(self.getslice())
4048
4049     def getslice(self, start, end):
4050         raise NotImplementedError('This method must be implemented by subclasses')
4051
4052     def __getitem__(self, idx):
4053         if not isinstance(idx, int) or idx < 0:
4054             raise TypeError('indices must be non-negative integers')
4055         entries = self.getslice(idx, idx + 1)
4056         return entries[0] if entries else None
4057
4058
4059 class OnDemandPagedList(PagedList):
4060     def __init__(self, pagefunc, pagesize, use_cache=True):
4061         self._pagefunc = pagefunc
4062         self._pagesize = pagesize
4063         self._use_cache = use_cache
4064         if use_cache:
4065             self._cache = {}
4066
4067     def getslice(self, start=0, end=None):
4068         res = []
4069         for pagenum in itertools.count(start // self._pagesize):
4070             firstid = pagenum * self._pagesize
4071             nextfirstid = pagenum * self._pagesize + self._pagesize
4072             if start >= nextfirstid:
4073                 continue
4074
4075             page_results = None
4076             if self._use_cache:
4077                 page_results = self._cache.get(pagenum)
4078             if page_results is None:
4079                 page_results = list(self._pagefunc(pagenum))
4080             if self._use_cache:
4081                 self._cache[pagenum] = page_results
4082
4083             startv = (
4084                 start % self._pagesize
4085                 if firstid <= start < nextfirstid
4086                 else 0)
4087
4088             endv = (
4089                 ((end - 1) % self._pagesize) + 1
4090                 if (end is not None and firstid <= end <= nextfirstid)
4091                 else None)
4092
4093             if startv != 0 or endv is not None:
4094                 page_results = page_results[startv:endv]
4095             res.extend(page_results)
4096
4097             # A little optimization - if current page is not "full", ie. does
4098             # not contain page_size videos then we can assume that this page
4099             # is the last one - there are no more ids on further pages -
4100             # i.e. no need to query again.
4101             if len(page_results) + startv < self._pagesize:
4102                 break
4103
4104             # If we got the whole page, but the next page is not interesting,
4105             # break out early as well
4106             if end == nextfirstid:
4107                 break
4108         return res
4109
4110
4111 class InAdvancePagedList(PagedList):
4112     def __init__(self, pagefunc, pagecount, pagesize):
4113         self._pagefunc = pagefunc
4114         self._pagecount = pagecount
4115         self._pagesize = pagesize
4116
4117     def getslice(self, start=0, end=None):
4118         res = []
4119         start_page = start // self._pagesize
4120         end_page = (
4121             self._pagecount if end is None else (end // self._pagesize + 1))
4122         skip_elems = start - start_page * self._pagesize
4123         only_more = None if end is None else end - start
4124         for pagenum in range(start_page, end_page):
4125             page = list(self._pagefunc(pagenum))
4126             if skip_elems:
4127                 page = page[skip_elems:]
4128                 skip_elems = None
4129             if only_more is not None:
4130                 if len(page) < only_more:
4131                     only_more -= len(page)
4132                 else:
4133                     page = page[:only_more]
4134                     res.extend(page)
4135                     break
4136             res.extend(page)
4137         return res
4138
4139
4140 def uppercase_escape(s):
4141     unicode_escape = codecs.getdecoder('unicode_escape')
4142     return re.sub(
4143         r'\\U[0-9a-fA-F]{8}',
4144         lambda m: unicode_escape(m.group(0))[0],
4145         s)
4146
4147
4148 def lowercase_escape(s):
4149     unicode_escape = codecs.getdecoder('unicode_escape')
4150     return re.sub(
4151         r'\\u[0-9a-fA-F]{4}',
4152         lambda m: unicode_escape(m.group(0))[0],
4153         s)
4154
4155
4156 def escape_rfc3986(s):
4157     """Escape non-ASCII characters as suggested by RFC 3986"""
4158     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4159         s = s.encode('utf-8')
4160     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4161
4162
4163 def escape_url(url):
4164     """Escape URL as suggested by RFC 3986"""
4165     url_parsed = compat_urllib_parse_urlparse(url)
4166     return url_parsed._replace(
4167         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4168         path=escape_rfc3986(url_parsed.path),
4169         params=escape_rfc3986(url_parsed.params),
4170         query=escape_rfc3986(url_parsed.query),
4171         fragment=escape_rfc3986(url_parsed.fragment)
4172     ).geturl()
4173
4174
4175 def read_batch_urls(batch_fd):
4176     def fixup(url):
4177         if not isinstance(url, compat_str):
4178             url = url.decode('utf-8', 'replace')
4179         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4180         for bom in BOM_UTF8:
4181             if url.startswith(bom):
4182                 url = url[len(bom):]
4183         url = url.lstrip()
4184         if not url or url.startswith(('#', ';', ']')):
4185             return False
4186         # "#" cannot be stripped out since it is part of the URI
4187         # However, it can be safely stipped out if follwing a whitespace
4188         return re.split(r'\s#', url, 1)[0].rstrip()
4189
4190     with contextlib.closing(batch_fd) as fd:
4191         return [url for url in map(fixup, fd) if url]
4192
4193
4194 def urlencode_postdata(*args, **kargs):
4195     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4196
4197
4198 def update_url_query(url, query):
4199     if not query:
4200         return url
4201     parsed_url = compat_urlparse.urlparse(url)
4202     qs = compat_parse_qs(parsed_url.query)
4203     qs.update(query)
4204     return compat_urlparse.urlunparse(parsed_url._replace(
4205         query=compat_urllib_parse_urlencode(qs, True)))
4206
4207
4208 def update_Request(req, url=None, data=None, headers={}, query={}):
4209     req_headers = req.headers.copy()
4210     req_headers.update(headers)
4211     req_data = data or req.data
4212     req_url = update_url_query(url or req.get_full_url(), query)
4213     req_get_method = req.get_method()
4214     if req_get_method == 'HEAD':
4215         req_type = HEADRequest
4216     elif req_get_method == 'PUT':
4217         req_type = PUTRequest
4218     else:
4219         req_type = compat_urllib_request.Request
4220     new_req = req_type(
4221         req_url, data=req_data, headers=req_headers,
4222         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4223     if hasattr(req, 'timeout'):
4224         new_req.timeout = req.timeout
4225     return new_req
4226
4227
4228 def _multipart_encode_impl(data, boundary):
4229     content_type = 'multipart/form-data; boundary=%s' % boundary
4230
4231     out = b''
4232     for k, v in data.items():
4233         out += b'--' + boundary.encode('ascii') + b'\r\n'
4234         if isinstance(k, compat_str):
4235             k = k.encode('utf-8')
4236         if isinstance(v, compat_str):
4237             v = v.encode('utf-8')
4238         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4239         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4240         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4241         if boundary.encode('ascii') in content:
4242             raise ValueError('Boundary overlaps with data')
4243         out += content
4244
4245     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4246
4247     return out, content_type
4248
4249
4250 def multipart_encode(data, boundary=None):
4251     '''
4252     Encode a dict to RFC 7578-compliant form-data
4253
4254     data:
4255         A dict where keys and values can be either Unicode or bytes-like
4256         objects.
4257     boundary:
4258         If specified a Unicode object, it's used as the boundary. Otherwise
4259         a random boundary is generated.
4260
4261     Reference: https://tools.ietf.org/html/rfc7578
4262     '''
4263     has_specified_boundary = boundary is not None
4264
4265     while True:
4266         if boundary is None:
4267             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4268
4269         try:
4270             out, content_type = _multipart_encode_impl(data, boundary)
4271             break
4272         except ValueError:
4273             if has_specified_boundary:
4274                 raise
4275             boundary = None
4276
4277     return out, content_type
4278
4279
4280 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4281     if isinstance(key_or_keys, (list, tuple)):
4282         for key in key_or_keys:
4283             if key not in d or d[key] is None or skip_false_values and not d[key]:
4284                 continue
4285             return d[key]
4286         return default
4287     return d.get(key_or_keys, default)
4288
4289
4290 def try_get(src, getter, expected_type=None):
4291     for get in variadic(getter):
4292         try:
4293             v = get(src)
4294         except (AttributeError, KeyError, TypeError, IndexError):
4295             pass
4296         else:
4297             if expected_type is None or isinstance(v, expected_type):
4298                 return v
4299
4300
4301 def merge_dicts(*dicts):
4302     merged = {}
4303     for a_dict in dicts:
4304         for k, v in a_dict.items():
4305             if v is None:
4306                 continue
4307             if (k not in merged
4308                     or (isinstance(v, compat_str) and v
4309                         and isinstance(merged[k], compat_str)
4310                         and not merged[k])):
4311                 merged[k] = v
4312     return merged
4313
4314
4315 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4316     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4317
4318
4319 US_RATINGS = {
4320     'G': 0,
4321     'PG': 10,
4322     'PG-13': 13,
4323     'R': 16,
4324     'NC': 18,
4325 }
4326
4327
4328 TV_PARENTAL_GUIDELINES = {
4329     'TV-Y': 0,
4330     'TV-Y7': 7,
4331     'TV-G': 0,
4332     'TV-PG': 0,
4333     'TV-14': 14,
4334     'TV-MA': 17,
4335 }
4336
4337
4338 def parse_age_limit(s):
4339     if type(s) == int:
4340         return s if 0 <= s <= 21 else None
4341     if not isinstance(s, compat_basestring):
4342         return None
4343     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4344     if m:
4345         return int(m.group('age'))
4346     s = s.upper()
4347     if s in US_RATINGS:
4348         return US_RATINGS[s]
4349     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4350     if m:
4351         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4352     return None
4353
4354
4355 def strip_jsonp(code):
4356     return re.sub(
4357         r'''(?sx)^
4358             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4359             (?:\s*&&\s*(?P=func_name))?
4360             \s*\(\s*(?P<callback_data>.*)\);?
4361             \s*?(?://[^\n]*)*$''',
4362         r'\g<callback_data>', code)
4363
4364
4365 def js_to_json(code, vars={}):
4366     # vars is a dict of var, val pairs to substitute
4367     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4368     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4369     INTEGER_TABLE = (
4370         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4371         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4372     )
4373
4374     def fix_kv(m):
4375         v = m.group(0)
4376         if v in ('true', 'false', 'null'):
4377             return v
4378         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4379             return ""
4380
4381         if v[0] in ("'", '"'):
4382             v = re.sub(r'(?s)\\.|"', lambda m: {
4383                 '"': '\\"',
4384                 "\\'": "'",
4385                 '\\\n': '',
4386                 '\\x': '\\u00',
4387             }.get(m.group(0), m.group(0)), v[1:-1])
4388         else:
4389             for regex, base in INTEGER_TABLE:
4390                 im = re.match(regex, v)
4391                 if im:
4392                     i = int(im.group(1), base)
4393                     return '"%d":' % i if v.endswith(':') else '%d' % i
4394
4395             if v in vars:
4396                 return vars[v]
4397
4398         return '"%s"' % v
4399
4400     return re.sub(r'''(?sx)
4401         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4402         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4403         {comment}|,(?={skip}[\]}}])|
4404         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4405         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4406         [0-9]+(?={skip}:)|
4407         !+
4408         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4409
4410
4411 def qualities(quality_ids):
4412     """ Get a numeric quality value out of a list of possible values """
4413     def q(qid):
4414         try:
4415             return quality_ids.index(qid)
4416         except ValueError:
4417             return -1
4418     return q
4419
4420
4421 DEFAULT_OUTTMPL = {
4422     'default': '%(title)s [%(id)s].%(ext)s',
4423     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4424 }
4425 OUTTMPL_TYPES = {
4426     'chapter': None,
4427     'subtitle': None,
4428     'thumbnail': None,
4429     'description': 'description',
4430     'annotation': 'annotations.xml',
4431     'infojson': 'info.json',
4432     'pl_thumbnail': None,
4433     'pl_description': 'description',
4434     'pl_infojson': 'info.json',
4435 }
4436
4437 # As of [1] format syntax is:
4438 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4439 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4440 STR_FORMAT_RE_TMPL = r'''(?x)
4441     (?<!%)(?P<prefix>(?:%%)*)
4442     %
4443     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4444     (?P<format>
4445         (?:[#0\-+ ]+)?  # conversion flags (optional)
4446         (?:\d+)?  # minimum field width (optional)
4447         (?:\.\d+)?  # precision (optional)
4448         [hlL]?  # length modifier (optional)
4449         {1}  # conversion type
4450     )
4451 '''
4452
4453
4454 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4455
4456
4457 def limit_length(s, length):
4458     """ Add ellipses to overly long strings """
4459     if s is None:
4460         return None
4461     ELLIPSES = '...'
4462     if len(s) > length:
4463         return s[:length - len(ELLIPSES)] + ELLIPSES
4464     return s
4465
4466
4467 def version_tuple(v):
4468     return tuple(int(e) for e in re.split(r'[-.]', v))
4469
4470
4471 def is_outdated_version(version, limit, assume_new=True):
4472     if not version:
4473         return not assume_new
4474     try:
4475         return version_tuple(version) < version_tuple(limit)
4476     except ValueError:
4477         return not assume_new
4478
4479
4480 def ytdl_is_updateable():
4481     """ Returns if yt-dlp can be updated with -U """
4482     return False
4483
4484     from zipimport import zipimporter
4485
4486     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4487
4488
4489 def args_to_str(args):
4490     # Get a short string representation for a subprocess command
4491     return ' '.join(compat_shlex_quote(a) for a in args)
4492
4493
4494 def error_to_compat_str(err):
4495     err_str = str(err)
4496     # On python 2 error byte string must be decoded with proper
4497     # encoding rather than ascii
4498     if sys.version_info[0] < 3:
4499         err_str = err_str.decode(preferredencoding())
4500     return err_str
4501
4502
4503 def mimetype2ext(mt):
4504     if mt is None:
4505         return None
4506
4507     ext = {
4508         'audio/mp4': 'm4a',
4509         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4510         # it's the most popular one
4511         'audio/mpeg': 'mp3',
4512         'audio/x-wav': 'wav',
4513     }.get(mt)
4514     if ext is not None:
4515         return ext
4516
4517     _, _, res = mt.rpartition('/')
4518     res = res.split(';')[0].strip().lower()
4519
4520     return {
4521         '3gpp': '3gp',
4522         'smptett+xml': 'tt',
4523         'ttaf+xml': 'dfxp',
4524         'ttml+xml': 'ttml',
4525         'x-flv': 'flv',
4526         'x-mp4-fragmented': 'mp4',
4527         'x-ms-sami': 'sami',
4528         'x-ms-wmv': 'wmv',
4529         'mpegurl': 'm3u8',
4530         'x-mpegurl': 'm3u8',
4531         'vnd.apple.mpegurl': 'm3u8',
4532         'dash+xml': 'mpd',
4533         'f4m+xml': 'f4m',
4534         'hds+xml': 'f4m',
4535         'vnd.ms-sstr+xml': 'ism',
4536         'quicktime': 'mov',
4537         'mp2t': 'ts',
4538         'x-wav': 'wav',
4539     }.get(res, res)
4540
4541
4542 def parse_codecs(codecs_str):
4543     # http://tools.ietf.org/html/rfc6381
4544     if not codecs_str:
4545         return {}
4546     split_codecs = list(filter(None, map(
4547         str.strip, codecs_str.strip().strip(',').split(','))))
4548     vcodec, acodec = None, None
4549     for full_codec in split_codecs:
4550         codec = full_codec.split('.')[0]
4551         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4552             if not vcodec:
4553                 vcodec = full_codec
4554         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4555             if not acodec:
4556                 acodec = full_codec
4557         else:
4558             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4559     if not vcodec and not acodec:
4560         if len(split_codecs) == 2:
4561             return {
4562                 'vcodec': split_codecs[0],
4563                 'acodec': split_codecs[1],
4564             }
4565     else:
4566         return {
4567             'vcodec': vcodec or 'none',
4568             'acodec': acodec or 'none',
4569         }
4570     return {}
4571
4572
4573 def urlhandle_detect_ext(url_handle):
4574     getheader = url_handle.headers.get
4575
4576     cd = getheader('Content-Disposition')
4577     if cd:
4578         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4579         if m:
4580             e = determine_ext(m.group('filename'), default_ext=None)
4581             if e:
4582                 return e
4583
4584     return mimetype2ext(getheader('Content-Type'))
4585
4586
4587 def encode_data_uri(data, mime_type):
4588     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4589
4590
4591 def age_restricted(content_limit, age_limit):
4592     """ Returns True iff the content should be blocked """
4593
4594     if age_limit is None:  # No limit set
4595         return False
4596     if content_limit is None:
4597         return False  # Content available for everyone
4598     return age_limit < content_limit
4599
4600
4601 def is_html(first_bytes):
4602     """ Detect whether a file contains HTML by examining its first bytes. """
4603
4604     BOMS = [
4605         (b'\xef\xbb\xbf', 'utf-8'),
4606         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4607         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4608         (b'\xff\xfe', 'utf-16-le'),
4609         (b'\xfe\xff', 'utf-16-be'),
4610     ]
4611     for bom, enc in BOMS:
4612         if first_bytes.startswith(bom):
4613             s = first_bytes[len(bom):].decode(enc, 'replace')
4614             break
4615     else:
4616         s = first_bytes.decode('utf-8', 'replace')
4617
4618     return re.match(r'^\s*<', s)
4619
4620
4621 def determine_protocol(info_dict):
4622     protocol = info_dict.get('protocol')
4623     if protocol is not None:
4624         return protocol
4625
4626     url = info_dict['url']
4627     if url.startswith('rtmp'):
4628         return 'rtmp'
4629     elif url.startswith('mms'):
4630         return 'mms'
4631     elif url.startswith('rtsp'):
4632         return 'rtsp'
4633
4634     ext = determine_ext(url)
4635     if ext == 'm3u8':
4636         return 'm3u8'
4637     elif ext == 'f4m':
4638         return 'f4m'
4639
4640     return compat_urllib_parse_urlparse(url).scheme
4641
4642
4643 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4644     """ Render a list of rows, each as a list of values """
4645
4646     def get_max_lens(table):
4647         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4648
4649     def filter_using_list(row, filterArray):
4650         return [col for (take, col) in zip(filterArray, row) if take]
4651
4652     if hideEmpty:
4653         max_lens = get_max_lens(data)
4654         header_row = filter_using_list(header_row, max_lens)
4655         data = [filter_using_list(row, max_lens) for row in data]
4656
4657     table = [header_row] + data
4658     max_lens = get_max_lens(table)
4659     if delim:
4660         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4661     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4662     return '\n'.join(format_str % tuple(row) for row in table)
4663
4664
4665 def _match_one(filter_part, dct):
4666     # TODO: Generalize code with YoutubeDL._build_format_filter
4667     STRING_OPERATORS = {
4668         '*=': operator.contains,
4669         '^=': lambda attr, value: attr.startswith(value),
4670         '$=': lambda attr, value: attr.endswith(value),
4671         '~=': lambda attr, value: re.search(value, attr),
4672     }
4673     COMPARISON_OPERATORS = {
4674         **STRING_OPERATORS,
4675         '<=': operator.le,  # "<=" must be defined above "<"
4676         '<': operator.lt,
4677         '>=': operator.ge,
4678         '>': operator.gt,
4679         '=': operator.eq,
4680     }
4681
4682     operator_rex = re.compile(r'''(?x)\s*
4683         (?P<key>[a-z_]+)
4684         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4685         (?:
4686             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4687             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4688             (?P<strval>.+?)
4689         )
4690         \s*$
4691         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4692     m = operator_rex.search(filter_part)
4693     if m:
4694         unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4695         if m.group('negation'):
4696             op = lambda attr, value: not unnegated_op(attr, value)
4697         else:
4698             op = unnegated_op
4699         actual_value = dct.get(m.group('key'))
4700         if (m.group('quotedstrval') is not None
4701             or m.group('strval') is not None
4702             # If the original field is a string and matching comparisonvalue is
4703             # a number we should respect the origin of the original field
4704             # and process comparison value as a string (see
4705             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4706             or actual_value is not None and m.group('intval') is not None
4707                 and isinstance(actual_value, compat_str)):
4708             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4709             quote = m.group('quote')
4710             if quote is not None:
4711                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4712         else:
4713             if m.group('op') in STRING_OPERATORS:
4714                 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4715             try:
4716                 comparison_value = int(m.group('intval'))
4717             except ValueError:
4718                 comparison_value = parse_filesize(m.group('intval'))
4719                 if comparison_value is None:
4720                     comparison_value = parse_filesize(m.group('intval') + 'B')
4721                 if comparison_value is None:
4722                     raise ValueError(
4723                         'Invalid integer value %r in filter part %r' % (
4724                             m.group('intval'), filter_part))
4725         if actual_value is None:
4726             return m.group('none_inclusive')
4727         return op(actual_value, comparison_value)
4728
4729     UNARY_OPERATORS = {
4730         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4731         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4732     }
4733     operator_rex = re.compile(r'''(?x)\s*
4734         (?P<op>%s)\s*(?P<key>[a-z_]+)
4735         \s*$
4736         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4737     m = operator_rex.search(filter_part)
4738     if m:
4739         op = UNARY_OPERATORS[m.group('op')]
4740         actual_value = dct.get(m.group('key'))
4741         return op(actual_value)
4742
4743     raise ValueError('Invalid filter part %r' % filter_part)
4744
4745
4746 def match_str(filter_str, dct):
4747     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4748
4749     return all(
4750         _match_one(filter_part.replace(r'\&', '&'), dct)
4751         for filter_part in re.split(r'(?<!\\)&', filter_str))
4752
4753
4754 def match_filter_func(filter_str):
4755     def _match_func(info_dict):
4756         if match_str(filter_str, info_dict):
4757             return None
4758         else:
4759             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4760             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4761     return _match_func
4762
4763
4764 def parse_dfxp_time_expr(time_expr):
4765     if not time_expr:
4766         return
4767
4768     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4769     if mobj:
4770         return float(mobj.group('time_offset'))
4771
4772     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4773     if mobj:
4774         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4775
4776
4777 def srt_subtitles_timecode(seconds):
4778     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4779
4780
4781 def dfxp2srt(dfxp_data):
4782     '''
4783     @param dfxp_data A bytes-like object containing DFXP data
4784     @returns A unicode object containing converted SRT data
4785     '''
4786     LEGACY_NAMESPACES = (
4787         (b'http://www.w3.org/ns/ttml', [
4788             b'http://www.w3.org/2004/11/ttaf1',
4789             b'http://www.w3.org/2006/04/ttaf1',
4790             b'http://www.w3.org/2006/10/ttaf1',
4791         ]),
4792         (b'http://www.w3.org/ns/ttml#styling', [
4793             b'http://www.w3.org/ns/ttml#style',
4794         ]),
4795     )
4796
4797     SUPPORTED_STYLING = [
4798         'color',
4799         'fontFamily',
4800         'fontSize',
4801         'fontStyle',
4802         'fontWeight',
4803         'textDecoration'
4804     ]
4805
4806     _x = functools.partial(xpath_with_ns, ns_map={
4807         'xml': 'http://www.w3.org/XML/1998/namespace',
4808         'ttml': 'http://www.w3.org/ns/ttml',
4809         'tts': 'http://www.w3.org/ns/ttml#styling',
4810     })
4811
4812     styles = {}
4813     default_style = {}
4814
4815     class TTMLPElementParser(object):
4816         _out = ''
4817         _unclosed_elements = []
4818         _applied_styles = []
4819
4820         def start(self, tag, attrib):
4821             if tag in (_x('ttml:br'), 'br'):
4822                 self._out += '\n'
4823             else:
4824                 unclosed_elements = []
4825                 style = {}
4826                 element_style_id = attrib.get('style')
4827                 if default_style:
4828                     style.update(default_style)
4829                 if element_style_id:
4830                     style.update(styles.get(element_style_id, {}))
4831                 for prop in SUPPORTED_STYLING:
4832                     prop_val = attrib.get(_x('tts:' + prop))
4833                     if prop_val:
4834                         style[prop] = prop_val
4835                 if style:
4836                     font = ''
4837                     for k, v in sorted(style.items()):
4838                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4839                             continue
4840                         if k == 'color':
4841                             font += ' color="%s"' % v
4842                         elif k == 'fontSize':
4843                             font += ' size="%s"' % v
4844                         elif k == 'fontFamily':
4845                             font += ' face="%s"' % v
4846                         elif k == 'fontWeight' and v == 'bold':
4847                             self._out += '<b>'
4848                             unclosed_elements.append('b')
4849                         elif k == 'fontStyle' and v == 'italic':
4850                             self._out += '<i>'
4851                             unclosed_elements.append('i')
4852                         elif k == 'textDecoration' and v == 'underline':
4853                             self._out += '<u>'
4854                             unclosed_elements.append('u')
4855                     if font:
4856                         self._out += '<font' + font + '>'
4857                         unclosed_elements.append('font')
4858                     applied_style = {}
4859                     if self._applied_styles:
4860                         applied_style.update(self._applied_styles[-1])
4861                     applied_style.update(style)
4862                     self._applied_styles.append(applied_style)
4863                 self._unclosed_elements.append(unclosed_elements)
4864
4865         def end(self, tag):
4866             if tag not in (_x('ttml:br'), 'br'):
4867                 unclosed_elements = self._unclosed_elements.pop()
4868                 for element in reversed(unclosed_elements):
4869                     self._out += '</%s>' % element
4870                 if unclosed_elements and self._applied_styles:
4871                     self._applied_styles.pop()
4872
4873         def data(self, data):
4874             self._out += data
4875
4876         def close(self):
4877             return self._out.strip()
4878
4879     def parse_node(node):
4880         target = TTMLPElementParser()
4881         parser = xml.etree.ElementTree.XMLParser(target=target)
4882         parser.feed(xml.etree.ElementTree.tostring(node))
4883         return parser.close()
4884
4885     for k, v in LEGACY_NAMESPACES:
4886         for ns in v:
4887             dfxp_data = dfxp_data.replace(ns, k)
4888
4889     dfxp = compat_etree_fromstring(dfxp_data)
4890     out = []
4891     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4892
4893     if not paras:
4894         raise ValueError('Invalid dfxp/TTML subtitle')
4895
4896     repeat = False
4897     while True:
4898         for style in dfxp.findall(_x('.//ttml:style')):
4899             style_id = style.get('id') or style.get(_x('xml:id'))
4900             if not style_id:
4901                 continue
4902             parent_style_id = style.get('style')
4903             if parent_style_id:
4904                 if parent_style_id not in styles:
4905                     repeat = True
4906                     continue
4907                 styles[style_id] = styles[parent_style_id].copy()
4908             for prop in SUPPORTED_STYLING:
4909                 prop_val = style.get(_x('tts:' + prop))
4910                 if prop_val:
4911                     styles.setdefault(style_id, {})[prop] = prop_val
4912         if repeat:
4913             repeat = False
4914         else:
4915             break
4916
4917     for p in ('body', 'div'):
4918         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4919         if ele is None:
4920             continue
4921         style = styles.get(ele.get('style'))
4922         if not style:
4923             continue
4924         default_style.update(style)
4925
4926     for para, index in zip(paras, itertools.count(1)):
4927         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4928         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4929         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4930         if begin_time is None:
4931             continue
4932         if not end_time:
4933             if not dur:
4934                 continue
4935             end_time = begin_time + dur
4936         out.append('%d\n%s --> %s\n%s\n\n' % (
4937             index,
4938             srt_subtitles_timecode(begin_time),
4939             srt_subtitles_timecode(end_time),
4940             parse_node(para)))
4941
4942     return ''.join(out)
4943
4944
4945 def cli_option(params, command_option, param):
4946     param = params.get(param)
4947     if param:
4948         param = compat_str(param)
4949     return [command_option, param] if param is not None else []
4950
4951
4952 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4953     param = params.get(param)
4954     if param is None:
4955         return []
4956     assert isinstance(param, bool)
4957     if separator:
4958         return [command_option + separator + (true_value if param else false_value)]
4959     return [command_option, true_value if param else false_value]
4960
4961
4962 def cli_valueless_option(params, command_option, param, expected_value=True):
4963     param = params.get(param)
4964     return [command_option] if param == expected_value else []
4965
4966
4967 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4968     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4969         if use_compat:
4970             return argdict
4971         else:
4972             argdict = None
4973     if argdict is None:
4974         return default
4975     assert isinstance(argdict, dict)
4976
4977     assert isinstance(keys, (list, tuple))
4978     for key_list in keys:
4979         arg_list = list(filter(
4980             lambda x: x is not None,
4981             [argdict.get(key.lower()) for key in variadic(key_list)]))
4982         if arg_list:
4983             return [arg for args in arg_list for arg in args]
4984     return default
4985
4986
4987 class ISO639Utils(object):
4988     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4989     _lang_map = {
4990         'aa': 'aar',
4991         'ab': 'abk',
4992         'ae': 'ave',
4993         'af': 'afr',
4994         'ak': 'aka',
4995         'am': 'amh',
4996         'an': 'arg',
4997         'ar': 'ara',
4998         'as': 'asm',
4999         'av': 'ava',
5000         'ay': 'aym',
5001         'az': 'aze',
5002         'ba': 'bak',
5003         'be': 'bel',
5004         'bg': 'bul',
5005         'bh': 'bih',
5006         'bi': 'bis',
5007         'bm': 'bam',
5008         'bn': 'ben',
5009         'bo': 'bod',
5010         'br': 'bre',
5011         'bs': 'bos',
5012         'ca': 'cat',
5013         'ce': 'che',
5014         'ch': 'cha',
5015         'co': 'cos',
5016         'cr': 'cre',
5017         'cs': 'ces',
5018         'cu': 'chu',
5019         'cv': 'chv',
5020         'cy': 'cym',
5021         'da': 'dan',
5022         'de': 'deu',
5023         'dv': 'div',
5024         'dz': 'dzo',
5025         'ee': 'ewe',
5026         'el': 'ell',
5027         'en': 'eng',
5028         'eo': 'epo',
5029         'es': 'spa',
5030         'et': 'est',
5031         'eu': 'eus',
5032         'fa': 'fas',
5033         'ff': 'ful',
5034         'fi': 'fin',
5035         'fj': 'fij',
5036         'fo': 'fao',
5037         'fr': 'fra',
5038         'fy': 'fry',
5039         'ga': 'gle',
5040         'gd': 'gla',
5041         'gl': 'glg',
5042         'gn': 'grn',
5043         'gu': 'guj',
5044         'gv': 'glv',
5045         'ha': 'hau',
5046         'he': 'heb',
5047         'iw': 'heb',  # Replaced by he in 1989 revision
5048         'hi': 'hin',
5049         'ho': 'hmo',
5050         'hr': 'hrv',
5051         'ht': 'hat',
5052         'hu': 'hun',
5053         'hy': 'hye',
5054         'hz': 'her',
5055         'ia': 'ina',
5056         'id': 'ind',
5057         'in': 'ind',  # Replaced by id in 1989 revision
5058         'ie': 'ile',
5059         'ig': 'ibo',
5060         'ii': 'iii',
5061         'ik': 'ipk',
5062         'io': 'ido',
5063         'is': 'isl',
5064         'it': 'ita',
5065         'iu': 'iku',
5066         'ja': 'jpn',
5067         'jv': 'jav',
5068         'ka': 'kat',
5069         'kg': 'kon',
5070         'ki': 'kik',
5071         'kj': 'kua',
5072         'kk': 'kaz',
5073         'kl': 'kal',
5074         'km': 'khm',
5075         'kn': 'kan',
5076         'ko': 'kor',
5077         'kr': 'kau',
5078         'ks': 'kas',
5079         'ku': 'kur',
5080         'kv': 'kom',
5081         'kw': 'cor',
5082         'ky': 'kir',
5083         'la': 'lat',
5084         'lb': 'ltz',
5085         'lg': 'lug',
5086         'li': 'lim',
5087         'ln': 'lin',
5088         'lo': 'lao',
5089         'lt': 'lit',
5090         'lu': 'lub',
5091         'lv': 'lav',
5092         'mg': 'mlg',
5093         'mh': 'mah',
5094         'mi': 'mri',
5095         'mk': 'mkd',
5096         'ml': 'mal',
5097         'mn': 'mon',
5098         'mr': 'mar',
5099         'ms': 'msa',
5100         'mt': 'mlt',
5101         'my': 'mya',
5102         'na': 'nau',
5103         'nb': 'nob',
5104         'nd': 'nde',
5105         'ne': 'nep',
5106         'ng': 'ndo',
5107         'nl': 'nld',
5108         'nn': 'nno',
5109         'no': 'nor',
5110         'nr': 'nbl',
5111         'nv': 'nav',
5112         'ny': 'nya',
5113         'oc': 'oci',
5114         'oj': 'oji',
5115         'om': 'orm',
5116         'or': 'ori',
5117         'os': 'oss',
5118         'pa': 'pan',
5119         'pi': 'pli',
5120         'pl': 'pol',
5121         'ps': 'pus',
5122         'pt': 'por',
5123         'qu': 'que',
5124         'rm': 'roh',
5125         'rn': 'run',
5126         'ro': 'ron',
5127         'ru': 'rus',
5128         'rw': 'kin',
5129         'sa': 'san',
5130         'sc': 'srd',
5131         'sd': 'snd',
5132         'se': 'sme',
5133         'sg': 'sag',
5134         'si': 'sin',
5135         'sk': 'slk',
5136         'sl': 'slv',
5137         'sm': 'smo',
5138         'sn': 'sna',
5139         'so': 'som',
5140         'sq': 'sqi',
5141         'sr': 'srp',
5142         'ss': 'ssw',
5143         'st': 'sot',
5144         'su': 'sun',
5145         'sv': 'swe',
5146         'sw': 'swa',
5147         'ta': 'tam',
5148         'te': 'tel',
5149         'tg': 'tgk',
5150         'th': 'tha',
5151         'ti': 'tir',
5152         'tk': 'tuk',
5153         'tl': 'tgl',
5154         'tn': 'tsn',
5155         'to': 'ton',
5156         'tr': 'tur',
5157         'ts': 'tso',
5158         'tt': 'tat',
5159         'tw': 'twi',
5160         'ty': 'tah',
5161         'ug': 'uig',
5162         'uk': 'ukr',
5163         'ur': 'urd',
5164         'uz': 'uzb',
5165         've': 'ven',
5166         'vi': 'vie',
5167         'vo': 'vol',
5168         'wa': 'wln',
5169         'wo': 'wol',
5170         'xh': 'xho',
5171         'yi': 'yid',
5172         'ji': 'yid',  # Replaced by yi in 1989 revision
5173         'yo': 'yor',
5174         'za': 'zha',
5175         'zh': 'zho',
5176         'zu': 'zul',
5177     }
5178
5179     @classmethod
5180     def short2long(cls, code):
5181         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5182         return cls._lang_map.get(code[:2])
5183
5184     @classmethod
5185     def long2short(cls, code):
5186         """Convert language code from ISO 639-2/T to ISO 639-1"""
5187         for short_name, long_name in cls._lang_map.items():
5188             if long_name == code:
5189                 return short_name
5190
5191
5192 class ISO3166Utils(object):
5193     # From http://data.okfn.org/data/core/country-list
5194     _country_map = {
5195         'AF': 'Afghanistan',
5196         'AX': 'Åland Islands',
5197         'AL': 'Albania',
5198         'DZ': 'Algeria',
5199         'AS': 'American Samoa',
5200         'AD': 'Andorra',
5201         'AO': 'Angola',
5202         'AI': 'Anguilla',
5203         'AQ': 'Antarctica',
5204         'AG': 'Antigua and Barbuda',
5205         'AR': 'Argentina',
5206         'AM': 'Armenia',
5207         'AW': 'Aruba',
5208         'AU': 'Australia',
5209         'AT': 'Austria',
5210         'AZ': 'Azerbaijan',
5211         'BS': 'Bahamas',
5212         'BH': 'Bahrain',
5213         'BD': 'Bangladesh',
5214         'BB': 'Barbados',
5215         'BY': 'Belarus',
5216         'BE': 'Belgium',
5217         'BZ': 'Belize',
5218         'BJ': 'Benin',
5219         'BM': 'Bermuda',
5220         'BT': 'Bhutan',
5221         'BO': 'Bolivia, Plurinational State of',
5222         'BQ': 'Bonaire, Sint Eustatius and Saba',
5223         'BA': 'Bosnia and Herzegovina',
5224         'BW': 'Botswana',
5225         'BV': 'Bouvet Island',
5226         'BR': 'Brazil',
5227         'IO': 'British Indian Ocean Territory',
5228         'BN': 'Brunei Darussalam',
5229         'BG': 'Bulgaria',
5230         'BF': 'Burkina Faso',
5231         'BI': 'Burundi',
5232         'KH': 'Cambodia',
5233         'CM': 'Cameroon',
5234         'CA': 'Canada',
5235         'CV': 'Cape Verde',
5236         'KY': 'Cayman Islands',
5237         'CF': 'Central African Republic',
5238         'TD': 'Chad',
5239         'CL': 'Chile',
5240         'CN': 'China',
5241         'CX': 'Christmas Island',
5242         'CC': 'Cocos (Keeling) Islands',
5243         'CO': 'Colombia',
5244         'KM': 'Comoros',
5245         'CG': 'Congo',
5246         'CD': 'Congo, the Democratic Republic of the',
5247         'CK': 'Cook Islands',
5248         'CR': 'Costa Rica',
5249         'CI': 'Côte d\'Ivoire',
5250         'HR': 'Croatia',
5251         'CU': 'Cuba',
5252         'CW': 'Curaçao',
5253         'CY': 'Cyprus',
5254         'CZ': 'Czech Republic',
5255         'DK': 'Denmark',
5256         'DJ': 'Djibouti',
5257         'DM': 'Dominica',
5258         'DO': 'Dominican Republic',
5259         'EC': 'Ecuador',
5260         'EG': 'Egypt',
5261         'SV': 'El Salvador',
5262         'GQ': 'Equatorial Guinea',
5263         'ER': 'Eritrea',
5264         'EE': 'Estonia',
5265         'ET': 'Ethiopia',
5266         'FK': 'Falkland Islands (Malvinas)',
5267         'FO': 'Faroe Islands',
5268         'FJ': 'Fiji',
5269         'FI': 'Finland',
5270         'FR': 'France',
5271         'GF': 'French Guiana',
5272         'PF': 'French Polynesia',
5273         'TF': 'French Southern Territories',
5274         'GA': 'Gabon',
5275         'GM': 'Gambia',
5276         'GE': 'Georgia',
5277         'DE': 'Germany',
5278         'GH': 'Ghana',
5279         'GI': 'Gibraltar',
5280         'GR': 'Greece',
5281         'GL': 'Greenland',
5282         'GD': 'Grenada',
5283         'GP': 'Guadeloupe',
5284         'GU': 'Guam',
5285         'GT': 'Guatemala',
5286         'GG': 'Guernsey',
5287         'GN': 'Guinea',
5288         'GW': 'Guinea-Bissau',
5289         'GY': 'Guyana',
5290         'HT': 'Haiti',
5291         'HM': 'Heard Island and McDonald Islands',
5292         'VA': 'Holy See (Vatican City State)',
5293         'HN': 'Honduras',
5294         'HK': 'Hong Kong',
5295         'HU': 'Hungary',
5296         'IS': 'Iceland',
5297         'IN': 'India',
5298         'ID': 'Indonesia',
5299         'IR': 'Iran, Islamic Republic of',
5300         'IQ': 'Iraq',
5301         'IE': 'Ireland',
5302         'IM': 'Isle of Man',
5303         'IL': 'Israel',
5304         'IT': 'Italy',
5305         'JM': 'Jamaica',
5306         'JP': 'Japan',
5307         'JE': 'Jersey',
5308         'JO': 'Jordan',
5309         'KZ': 'Kazakhstan',
5310         'KE': 'Kenya',
5311         'KI': 'Kiribati',
5312         'KP': 'Korea, Democratic People\'s Republic of',
5313         'KR': 'Korea, Republic of',
5314         'KW': 'Kuwait',
5315         'KG': 'Kyrgyzstan',
5316         'LA': 'Lao People\'s Democratic Republic',
5317         'LV': 'Latvia',
5318         'LB': 'Lebanon',
5319         'LS': 'Lesotho',
5320         'LR': 'Liberia',
5321         'LY': 'Libya',
5322         'LI': 'Liechtenstein',
5323         'LT': 'Lithuania',
5324         'LU': 'Luxembourg',
5325         'MO': 'Macao',
5326         'MK': 'Macedonia, the Former Yugoslav Republic of',
5327         'MG': 'Madagascar',
5328         'MW': 'Malawi',
5329         'MY': 'Malaysia',
5330         'MV': 'Maldives',
5331         'ML': 'Mali',
5332         'MT': 'Malta',
5333         'MH': 'Marshall Islands',
5334         'MQ': 'Martinique',
5335         'MR': 'Mauritania',
5336         'MU': 'Mauritius',
5337         'YT': 'Mayotte',
5338         'MX': 'Mexico',
5339         'FM': 'Micronesia, Federated States of',
5340         'MD': 'Moldova, Republic of',
5341         'MC': 'Monaco',
5342         'MN': 'Mongolia',
5343         'ME': 'Montenegro',
5344         'MS': 'Montserrat',
5345         'MA': 'Morocco',
5346         'MZ': 'Mozambique',
5347         'MM': 'Myanmar',
5348         'NA': 'Namibia',
5349         'NR': 'Nauru',
5350         'NP': 'Nepal',
5351         'NL': 'Netherlands',
5352         'NC': 'New Caledonia',
5353         'NZ': 'New Zealand',
5354         'NI': 'Nicaragua',
5355         'NE': 'Niger',
5356         'NG': 'Nigeria',
5357         'NU': 'Niue',
5358         'NF': 'Norfolk Island',
5359         'MP': 'Northern Mariana Islands',
5360         'NO': 'Norway',
5361         'OM': 'Oman',
5362         'PK': 'Pakistan',
5363         'PW': 'Palau',
5364         'PS': 'Palestine, State of',
5365         'PA': 'Panama',
5366         'PG': 'Papua New Guinea',
5367         'PY': 'Paraguay',
5368         'PE': 'Peru',
5369         'PH': 'Philippines',
5370         'PN': 'Pitcairn',
5371         'PL': 'Poland',
5372         'PT': 'Portugal',
5373         'PR': 'Puerto Rico',
5374         'QA': 'Qatar',
5375         'RE': 'Réunion',
5376         'RO': 'Romania',
5377         'RU': 'Russian Federation',
5378         'RW': 'Rwanda',
5379         'BL': 'Saint Barthélemy',
5380         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5381         'KN': 'Saint Kitts and Nevis',
5382         'LC': 'Saint Lucia',
5383         'MF': 'Saint Martin (French part)',
5384         'PM': 'Saint Pierre and Miquelon',
5385         'VC': 'Saint Vincent and the Grenadines',
5386         'WS': 'Samoa',
5387         'SM': 'San Marino',
5388         'ST': 'Sao Tome and Principe',
5389         'SA': 'Saudi Arabia',
5390         'SN': 'Senegal',
5391         'RS': 'Serbia',
5392         'SC': 'Seychelles',
5393         'SL': 'Sierra Leone',
5394         'SG': 'Singapore',
5395         'SX': 'Sint Maarten (Dutch part)',
5396         'SK': 'Slovakia',
5397         'SI': 'Slovenia',
5398         'SB': 'Solomon Islands',
5399         'SO': 'Somalia',
5400         'ZA': 'South Africa',
5401         'GS': 'South Georgia and the South Sandwich Islands',
5402         'SS': 'South Sudan',
5403         'ES': 'Spain',
5404         'LK': 'Sri Lanka',
5405         'SD': 'Sudan',
5406         'SR': 'Suriname',
5407         'SJ': 'Svalbard and Jan Mayen',
5408         'SZ': 'Swaziland',
5409         'SE': 'Sweden',
5410         'CH': 'Switzerland',
5411         'SY': 'Syrian Arab Republic',
5412         'TW': 'Taiwan, Province of China',
5413         'TJ': 'Tajikistan',
5414         'TZ': 'Tanzania, United Republic of',
5415         'TH': 'Thailand',
5416         'TL': 'Timor-Leste',
5417         'TG': 'Togo',
5418         'TK': 'Tokelau',
5419         'TO': 'Tonga',
5420         'TT': 'Trinidad and Tobago',
5421         'TN': 'Tunisia',
5422         'TR': 'Turkey',
5423         'TM': 'Turkmenistan',
5424         'TC': 'Turks and Caicos Islands',
5425         'TV': 'Tuvalu',
5426         'UG': 'Uganda',
5427         'UA': 'Ukraine',
5428         'AE': 'United Arab Emirates',
5429         'GB': 'United Kingdom',
5430         'US': 'United States',
5431         'UM': 'United States Minor Outlying Islands',
5432         'UY': 'Uruguay',
5433         'UZ': 'Uzbekistan',
5434         'VU': 'Vanuatu',
5435         'VE': 'Venezuela, Bolivarian Republic of',
5436         'VN': 'Viet Nam',
5437         'VG': 'Virgin Islands, British',
5438         'VI': 'Virgin Islands, U.S.',
5439         'WF': 'Wallis and Futuna',
5440         'EH': 'Western Sahara',
5441         'YE': 'Yemen',
5442         'ZM': 'Zambia',
5443         'ZW': 'Zimbabwe',
5444     }
5445
5446     @classmethod
5447     def short2full(cls, code):
5448         """Convert an ISO 3166-2 country code to the corresponding full name"""
5449         return cls._country_map.get(code.upper())
5450
5451
5452 class GeoUtils(object):
5453     # Major IPv4 address blocks per country
5454     _country_ip_map = {
5455         'AD': '46.172.224.0/19',
5456         'AE': '94.200.0.0/13',
5457         'AF': '149.54.0.0/17',
5458         'AG': '209.59.64.0/18',
5459         'AI': '204.14.248.0/21',
5460         'AL': '46.99.0.0/16',
5461         'AM': '46.70.0.0/15',
5462         'AO': '105.168.0.0/13',
5463         'AP': '182.50.184.0/21',
5464         'AQ': '23.154.160.0/24',
5465         'AR': '181.0.0.0/12',
5466         'AS': '202.70.112.0/20',
5467         'AT': '77.116.0.0/14',
5468         'AU': '1.128.0.0/11',
5469         'AW': '181.41.0.0/18',
5470         'AX': '185.217.4.0/22',
5471         'AZ': '5.197.0.0/16',
5472         'BA': '31.176.128.0/17',
5473         'BB': '65.48.128.0/17',
5474         'BD': '114.130.0.0/16',
5475         'BE': '57.0.0.0/8',
5476         'BF': '102.178.0.0/15',
5477         'BG': '95.42.0.0/15',
5478         'BH': '37.131.0.0/17',
5479         'BI': '154.117.192.0/18',
5480         'BJ': '137.255.0.0/16',
5481         'BL': '185.212.72.0/23',
5482         'BM': '196.12.64.0/18',
5483         'BN': '156.31.0.0/16',
5484         'BO': '161.56.0.0/16',
5485         'BQ': '161.0.80.0/20',
5486         'BR': '191.128.0.0/12',
5487         'BS': '24.51.64.0/18',
5488         'BT': '119.2.96.0/19',
5489         'BW': '168.167.0.0/16',
5490         'BY': '178.120.0.0/13',
5491         'BZ': '179.42.192.0/18',
5492         'CA': '99.224.0.0/11',
5493         'CD': '41.243.0.0/16',
5494         'CF': '197.242.176.0/21',
5495         'CG': '160.113.0.0/16',
5496         'CH': '85.0.0.0/13',
5497         'CI': '102.136.0.0/14',
5498         'CK': '202.65.32.0/19',
5499         'CL': '152.172.0.0/14',
5500         'CM': '102.244.0.0/14',
5501         'CN': '36.128.0.0/10',
5502         'CO': '181.240.0.0/12',
5503         'CR': '201.192.0.0/12',
5504         'CU': '152.206.0.0/15',
5505         'CV': '165.90.96.0/19',
5506         'CW': '190.88.128.0/17',
5507         'CY': '31.153.0.0/16',
5508         'CZ': '88.100.0.0/14',
5509         'DE': '53.0.0.0/8',
5510         'DJ': '197.241.0.0/17',
5511         'DK': '87.48.0.0/12',
5512         'DM': '192.243.48.0/20',
5513         'DO': '152.166.0.0/15',
5514         'DZ': '41.96.0.0/12',
5515         'EC': '186.68.0.0/15',
5516         'EE': '90.190.0.0/15',
5517         'EG': '156.160.0.0/11',
5518         'ER': '196.200.96.0/20',
5519         'ES': '88.0.0.0/11',
5520         'ET': '196.188.0.0/14',
5521         'EU': '2.16.0.0/13',
5522         'FI': '91.152.0.0/13',
5523         'FJ': '144.120.0.0/16',
5524         'FK': '80.73.208.0/21',
5525         'FM': '119.252.112.0/20',
5526         'FO': '88.85.32.0/19',
5527         'FR': '90.0.0.0/9',
5528         'GA': '41.158.0.0/15',
5529         'GB': '25.0.0.0/8',
5530         'GD': '74.122.88.0/21',
5531         'GE': '31.146.0.0/16',
5532         'GF': '161.22.64.0/18',
5533         'GG': '62.68.160.0/19',
5534         'GH': '154.160.0.0/12',
5535         'GI': '95.164.0.0/16',
5536         'GL': '88.83.0.0/19',
5537         'GM': '160.182.0.0/15',
5538         'GN': '197.149.192.0/18',
5539         'GP': '104.250.0.0/19',
5540         'GQ': '105.235.224.0/20',
5541         'GR': '94.64.0.0/13',
5542         'GT': '168.234.0.0/16',
5543         'GU': '168.123.0.0/16',
5544         'GW': '197.214.80.0/20',
5545         'GY': '181.41.64.0/18',
5546         'HK': '113.252.0.0/14',
5547         'HN': '181.210.0.0/16',
5548         'HR': '93.136.0.0/13',
5549         'HT': '148.102.128.0/17',
5550         'HU': '84.0.0.0/14',
5551         'ID': '39.192.0.0/10',
5552         'IE': '87.32.0.0/12',
5553         'IL': '79.176.0.0/13',
5554         'IM': '5.62.80.0/20',
5555         'IN': '117.192.0.0/10',
5556         'IO': '203.83.48.0/21',
5557         'IQ': '37.236.0.0/14',
5558         'IR': '2.176.0.0/12',
5559         'IS': '82.221.0.0/16',
5560         'IT': '79.0.0.0/10',
5561         'JE': '87.244.64.0/18',
5562         'JM': '72.27.0.0/17',
5563         'JO': '176.29.0.0/16',
5564         'JP': '133.0.0.0/8',
5565         'KE': '105.48.0.0/12',
5566         'KG': '158.181.128.0/17',
5567         'KH': '36.37.128.0/17',
5568         'KI': '103.25.140.0/22',
5569         'KM': '197.255.224.0/20',
5570         'KN': '198.167.192.0/19',
5571         'KP': '175.45.176.0/22',
5572         'KR': '175.192.0.0/10',
5573         'KW': '37.36.0.0/14',
5574         'KY': '64.96.0.0/15',
5575         'KZ': '2.72.0.0/13',
5576         'LA': '115.84.64.0/18',
5577         'LB': '178.135.0.0/16',
5578         'LC': '24.92.144.0/20',
5579         'LI': '82.117.0.0/19',
5580         'LK': '112.134.0.0/15',
5581         'LR': '102.183.0.0/16',
5582         'LS': '129.232.0.0/17',
5583         'LT': '78.56.0.0/13',
5584         'LU': '188.42.0.0/16',
5585         'LV': '46.109.0.0/16',
5586         'LY': '41.252.0.0/14',
5587         'MA': '105.128.0.0/11',
5588         'MC': '88.209.64.0/18',
5589         'MD': '37.246.0.0/16',
5590         'ME': '178.175.0.0/17',
5591         'MF': '74.112.232.0/21',
5592         'MG': '154.126.0.0/17',
5593         'MH': '117.103.88.0/21',
5594         'MK': '77.28.0.0/15',
5595         'ML': '154.118.128.0/18',
5596         'MM': '37.111.0.0/17',
5597         'MN': '49.0.128.0/17',
5598         'MO': '60.246.0.0/16',
5599         'MP': '202.88.64.0/20',
5600         'MQ': '109.203.224.0/19',
5601         'MR': '41.188.64.0/18',
5602         'MS': '208.90.112.0/22',
5603         'MT': '46.11.0.0/16',
5604         'MU': '105.16.0.0/12',
5605         'MV': '27.114.128.0/18',
5606         'MW': '102.70.0.0/15',
5607         'MX': '187.192.0.0/11',
5608         'MY': '175.136.0.0/13',
5609         'MZ': '197.218.0.0/15',
5610         'NA': '41.182.0.0/16',
5611         'NC': '101.101.0.0/18',
5612         'NE': '197.214.0.0/18',
5613         'NF': '203.17.240.0/22',
5614         'NG': '105.112.0.0/12',
5615         'NI': '186.76.0.0/15',
5616         'NL': '145.96.0.0/11',
5617         'NO': '84.208.0.0/13',
5618         'NP': '36.252.0.0/15',
5619         'NR': '203.98.224.0/19',
5620         'NU': '49.156.48.0/22',
5621         'NZ': '49.224.0.0/14',
5622         'OM': '5.36.0.0/15',
5623         'PA': '186.72.0.0/15',
5624         'PE': '186.160.0.0/14',
5625         'PF': '123.50.64.0/18',
5626         'PG': '124.240.192.0/19',
5627         'PH': '49.144.0.0/13',
5628         'PK': '39.32.0.0/11',
5629         'PL': '83.0.0.0/11',
5630         'PM': '70.36.0.0/20',
5631         'PR': '66.50.0.0/16',
5632         'PS': '188.161.0.0/16',
5633         'PT': '85.240.0.0/13',
5634         'PW': '202.124.224.0/20',
5635         'PY': '181.120.0.0/14',
5636         'QA': '37.210.0.0/15',
5637         'RE': '102.35.0.0/16',
5638         'RO': '79.112.0.0/13',
5639         'RS': '93.86.0.0/15',
5640         'RU': '5.136.0.0/13',
5641         'RW': '41.186.0.0/16',
5642         'SA': '188.48.0.0/13',
5643         'SB': '202.1.160.0/19',
5644         'SC': '154.192.0.0/11',
5645         'SD': '102.120.0.0/13',
5646         'SE': '78.64.0.0/12',
5647         'SG': '8.128.0.0/10',
5648         'SI': '188.196.0.0/14',
5649         'SK': '78.98.0.0/15',
5650         'SL': '102.143.0.0/17',
5651         'SM': '89.186.32.0/19',
5652         'SN': '41.82.0.0/15',
5653         'SO': '154.115.192.0/18',
5654         'SR': '186.179.128.0/17',
5655         'SS': '105.235.208.0/21',
5656         'ST': '197.159.160.0/19',
5657         'SV': '168.243.0.0/16',
5658         'SX': '190.102.0.0/20',
5659         'SY': '5.0.0.0/16',
5660         'SZ': '41.84.224.0/19',
5661         'TC': '65.255.48.0/20',
5662         'TD': '154.68.128.0/19',
5663         'TG': '196.168.0.0/14',
5664         'TH': '171.96.0.0/13',
5665         'TJ': '85.9.128.0/18',
5666         'TK': '27.96.24.0/21',
5667         'TL': '180.189.160.0/20',
5668         'TM': '95.85.96.0/19',
5669         'TN': '197.0.0.0/11',
5670         'TO': '175.176.144.0/21',
5671         'TR': '78.160.0.0/11',
5672         'TT': '186.44.0.0/15',
5673         'TV': '202.2.96.0/19',
5674         'TW': '120.96.0.0/11',
5675         'TZ': '156.156.0.0/14',
5676         'UA': '37.52.0.0/14',
5677         'UG': '102.80.0.0/13',
5678         'US': '6.0.0.0/8',
5679         'UY': '167.56.0.0/13',
5680         'UZ': '84.54.64.0/18',
5681         'VA': '212.77.0.0/19',
5682         'VC': '207.191.240.0/21',
5683         'VE': '186.88.0.0/13',
5684         'VG': '66.81.192.0/20',
5685         'VI': '146.226.0.0/16',
5686         'VN': '14.160.0.0/11',
5687         'VU': '202.80.32.0/20',
5688         'WF': '117.20.32.0/21',
5689         'WS': '202.4.32.0/19',
5690         'YE': '134.35.0.0/16',
5691         'YT': '41.242.116.0/22',
5692         'ZA': '41.0.0.0/11',
5693         'ZM': '102.144.0.0/13',
5694         'ZW': '102.177.192.0/18',
5695     }
5696
5697     @classmethod
5698     def random_ipv4(cls, code_or_block):
5699         if len(code_or_block) == 2:
5700             block = cls._country_ip_map.get(code_or_block.upper())
5701             if not block:
5702                 return None
5703         else:
5704             block = code_or_block
5705         addr, preflen = block.split('/')
5706         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5707         addr_max = addr_min | (0xffffffff >> int(preflen))
5708         return compat_str(socket.inet_ntoa(
5709             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5710
5711
5712 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5713     def __init__(self, proxies=None):
5714         # Set default handlers
5715         for type in ('http', 'https'):
5716             setattr(self, '%s_open' % type,
5717                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5718                         meth(r, proxy, type))
5719         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5720
5721     def proxy_open(self, req, proxy, type):
5722         req_proxy = req.headers.get('Ytdl-request-proxy')
5723         if req_proxy is not None:
5724             proxy = req_proxy
5725             del req.headers['Ytdl-request-proxy']
5726
5727         if proxy == '__noproxy__':
5728             return None  # No Proxy
5729         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5730             req.add_header('Ytdl-socks-proxy', proxy)
5731             # yt-dlp's http/https handlers do wrapping the socket with socks
5732             return None
5733         return compat_urllib_request.ProxyHandler.proxy_open(
5734             self, req, proxy, type)
5735
5736
5737 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5738 # released into Public Domain
5739 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5740
5741 def long_to_bytes(n, blocksize=0):
5742     """long_to_bytes(n:long, blocksize:int) : string
5743     Convert a long integer to a byte string.
5744
5745     If optional blocksize is given and greater than zero, pad the front of the
5746     byte string with binary zeros so that the length is a multiple of
5747     blocksize.
5748     """
5749     # after much testing, this algorithm was deemed to be the fastest
5750     s = b''
5751     n = int(n)
5752     while n > 0:
5753         s = compat_struct_pack('>I', n & 0xffffffff) + s
5754         n = n >> 32
5755     # strip off leading zeros
5756     for i in range(len(s)):
5757         if s[i] != b'\000'[0]:
5758             break
5759     else:
5760         # only happens when n == 0
5761         s = b'\000'
5762         i = 0
5763     s = s[i:]
5764     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5765     # de-padding being done above, but sigh...
5766     if blocksize > 0 and len(s) % blocksize:
5767         s = (blocksize - len(s) % blocksize) * b'\000' + s
5768     return s
5769
5770
5771 def bytes_to_long(s):
5772     """bytes_to_long(string) : long
5773     Convert a byte string to a long integer.
5774
5775     This is (essentially) the inverse of long_to_bytes().
5776     """
5777     acc = 0
5778     length = len(s)
5779     if length % 4:
5780         extra = (4 - length % 4)
5781         s = b'\000' * extra + s
5782         length = length + extra
5783     for i in range(0, length, 4):
5784         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5785     return acc
5786
5787
5788 def ohdave_rsa_encrypt(data, exponent, modulus):
5789     '''
5790     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5791
5792     Input:
5793         data: data to encrypt, bytes-like object
5794         exponent, modulus: parameter e and N of RSA algorithm, both integer
5795     Output: hex string of encrypted data
5796
5797     Limitation: supports one block encryption only
5798     '''
5799
5800     payload = int(binascii.hexlify(data[::-1]), 16)
5801     encrypted = pow(payload, exponent, modulus)
5802     return '%x' % encrypted
5803
5804
5805 def pkcs1pad(data, length):
5806     """
5807     Padding input data with PKCS#1 scheme
5808
5809     @param {int[]} data        input data
5810     @param {int}   length      target length
5811     @returns {int[]}           padded data
5812     """
5813     if len(data) > length - 11:
5814         raise ValueError('Input data too long for PKCS#1 padding')
5815
5816     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5817     return [0, 2] + pseudo_random + [0] + data
5818
5819
5820 def encode_base_n(num, n, table=None):
5821     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5822     if not table:
5823         table = FULL_TABLE[:n]
5824
5825     if n > len(table):
5826         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5827
5828     if num == 0:
5829         return table[0]
5830
5831     ret = ''
5832     while num:
5833         ret = table[num % n] + ret
5834         num = num // n
5835     return ret
5836
5837
5838 def decode_packed_codes(code):
5839     mobj = re.search(PACKED_CODES_RE, code)
5840     obfuscated_code, base, count, symbols = mobj.groups()
5841     base = int(base)
5842     count = int(count)
5843     symbols = symbols.split('|')
5844     symbol_table = {}
5845
5846     while count:
5847         count -= 1
5848         base_n_count = encode_base_n(count, base)
5849         symbol_table[base_n_count] = symbols[count] or base_n_count
5850
5851     return re.sub(
5852         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5853         obfuscated_code)
5854
5855
5856 def caesar(s, alphabet, shift):
5857     if shift == 0:
5858         return s
5859     l = len(alphabet)
5860     return ''.join(
5861         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5862         for c in s)
5863
5864
5865 def rot47(s):
5866     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5867
5868
5869 def parse_m3u8_attributes(attrib):
5870     info = {}
5871     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5872         if val.startswith('"'):
5873             val = val[1:-1]
5874         info[key] = val
5875     return info
5876
5877
5878 def urshift(val, n):
5879     return val >> n if val >= 0 else (val + 0x100000000) >> n
5880
5881
5882 # Based on png2str() written by @gdkchan and improved by @yokrysty
5883 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5884 def decode_png(png_data):
5885     # Reference: https://www.w3.org/TR/PNG/
5886     header = png_data[8:]
5887
5888     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5889         raise IOError('Not a valid PNG file.')
5890
5891     int_map = {1: '>B', 2: '>H', 4: '>I'}
5892     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5893
5894     chunks = []
5895
5896     while header:
5897         length = unpack_integer(header[:4])
5898         header = header[4:]
5899
5900         chunk_type = header[:4]
5901         header = header[4:]
5902
5903         chunk_data = header[:length]
5904         header = header[length:]
5905
5906         header = header[4:]  # Skip CRC
5907
5908         chunks.append({
5909             'type': chunk_type,
5910             'length': length,
5911             'data': chunk_data
5912         })
5913
5914     ihdr = chunks[0]['data']
5915
5916     width = unpack_integer(ihdr[:4])
5917     height = unpack_integer(ihdr[4:8])
5918
5919     idat = b''
5920
5921     for chunk in chunks:
5922         if chunk['type'] == b'IDAT':
5923             idat += chunk['data']
5924
5925     if not idat:
5926         raise IOError('Unable to read PNG data.')
5927
5928     decompressed_data = bytearray(zlib.decompress(idat))
5929
5930     stride = width * 3
5931     pixels = []
5932
5933     def _get_pixel(idx):
5934         x = idx % stride
5935         y = idx // stride
5936         return pixels[y][x]
5937
5938     for y in range(height):
5939         basePos = y * (1 + stride)
5940         filter_type = decompressed_data[basePos]
5941
5942         current_row = []
5943
5944         pixels.append(current_row)
5945
5946         for x in range(stride):
5947             color = decompressed_data[1 + basePos + x]
5948             basex = y * stride + x
5949             left = 0
5950             up = 0
5951
5952             if x > 2:
5953                 left = _get_pixel(basex - 3)
5954             if y > 0:
5955                 up = _get_pixel(basex - stride)
5956
5957             if filter_type == 1:  # Sub
5958                 color = (color + left) & 0xff
5959             elif filter_type == 2:  # Up
5960                 color = (color + up) & 0xff
5961             elif filter_type == 3:  # Average
5962                 color = (color + ((left + up) >> 1)) & 0xff
5963             elif filter_type == 4:  # Paeth
5964                 a = left
5965                 b = up
5966                 c = 0
5967
5968                 if x > 2 and y > 0:
5969                     c = _get_pixel(basex - stride - 3)
5970
5971                 p = a + b - c
5972
5973                 pa = abs(p - a)
5974                 pb = abs(p - b)
5975                 pc = abs(p - c)
5976
5977                 if pa <= pb and pa <= pc:
5978                     color = (color + a) & 0xff
5979                 elif pb <= pc:
5980                     color = (color + b) & 0xff
5981                 else:
5982                     color = (color + c) & 0xff
5983
5984             current_row.append(color)
5985
5986     return width, height, pixels
5987
5988
5989 def write_xattr(path, key, value):
5990     # This mess below finds the best xattr tool for the job
5991     try:
5992         # try the pyxattr module...
5993         import xattr
5994
5995         if hasattr(xattr, 'set'):  # pyxattr
5996             # Unicode arguments are not supported in python-pyxattr until
5997             # version 0.5.0
5998             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5999             pyxattr_required_version = '0.5.0'
6000             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6001                 # TODO: fallback to CLI tools
6002                 raise XAttrUnavailableError(
6003                     'python-pyxattr is detected but is too old. '
6004                     'yt-dlp requires %s or above while your version is %s. '
6005                     'Falling back to other xattr implementations' % (
6006                         pyxattr_required_version, xattr.__version__))
6007
6008             setxattr = xattr.set
6009         else:  # xattr
6010             setxattr = xattr.setxattr
6011
6012         try:
6013             setxattr(path, key, value)
6014         except EnvironmentError as e:
6015             raise XAttrMetadataError(e.errno, e.strerror)
6016
6017     except ImportError:
6018         if compat_os_name == 'nt':
6019             # Write xattrs to NTFS Alternate Data Streams:
6020             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6021             assert ':' not in key
6022             assert os.path.exists(path)
6023
6024             ads_fn = path + ':' + key
6025             try:
6026                 with open(ads_fn, 'wb') as f:
6027                     f.write(value)
6028             except EnvironmentError as e:
6029                 raise XAttrMetadataError(e.errno, e.strerror)
6030         else:
6031             user_has_setfattr = check_executable('setfattr', ['--version'])
6032             user_has_xattr = check_executable('xattr', ['-h'])
6033
6034             if user_has_setfattr or user_has_xattr:
6035
6036                 value = value.decode('utf-8')
6037                 if user_has_setfattr:
6038                     executable = 'setfattr'
6039                     opts = ['-n', key, '-v', value]
6040                 elif user_has_xattr:
6041                     executable = 'xattr'
6042                     opts = ['-w', key, value]
6043
6044                 cmd = ([encodeFilename(executable, True)]
6045                        + [encodeArgument(o) for o in opts]
6046                        + [encodeFilename(path, True)])
6047
6048                 try:
6049                     p = subprocess.Popen(
6050                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6051                 except EnvironmentError as e:
6052                     raise XAttrMetadataError(e.errno, e.strerror)
6053                 stdout, stderr = process_communicate_or_kill(p)
6054                 stderr = stderr.decode('utf-8', 'replace')
6055                 if p.returncode != 0:
6056                     raise XAttrMetadataError(p.returncode, stderr)
6057
6058             else:
6059                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6060                 if sys.platform.startswith('linux'):
6061                     raise XAttrUnavailableError(
6062                         "Couldn't find a tool to set the xattrs. "
6063                         "Install either the python 'pyxattr' or 'xattr' "
6064                         "modules, or the GNU 'attr' package "
6065                         "(which contains the 'setfattr' tool).")
6066                 else:
6067                     raise XAttrUnavailableError(
6068                         "Couldn't find a tool to set the xattrs. "
6069                         "Install either the python 'xattr' module, "
6070                         "or the 'xattr' binary.")
6071
6072
6073 def random_birthday(year_field, month_field, day_field):
6074     start_date = datetime.date(1950, 1, 1)
6075     end_date = datetime.date(1995, 12, 31)
6076     offset = random.randint(0, (end_date - start_date).days)
6077     random_date = start_date + datetime.timedelta(offset)
6078     return {
6079         year_field: str(random_date.year),
6080         month_field: str(random_date.month),
6081         day_field: str(random_date.day),
6082     }
6083
6084
6085 # Templates for internet shortcut files, which are plain text files.
6086 DOT_URL_LINK_TEMPLATE = '''
6087 [InternetShortcut]
6088 URL=%(url)s
6089 '''.lstrip()
6090
6091 DOT_WEBLOC_LINK_TEMPLATE = '''
6092 <?xml version="1.0" encoding="UTF-8"?>
6093 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6094 <plist version="1.0">
6095 <dict>
6096 \t<key>URL</key>
6097 \t<string>%(url)s</string>
6098 </dict>
6099 </plist>
6100 '''.lstrip()
6101
6102 DOT_DESKTOP_LINK_TEMPLATE = '''
6103 [Desktop Entry]
6104 Encoding=UTF-8
6105 Name=%(filename)s
6106 Type=Link
6107 URL=%(url)s
6108 Icon=text-html
6109 '''.lstrip()
6110
6111
6112 def iri_to_uri(iri):
6113     """
6114     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6115
6116     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6117     """
6118
6119     iri_parts = compat_urllib_parse_urlparse(iri)
6120
6121     if '[' in iri_parts.netloc:
6122         raise ValueError('IPv6 URIs are not, yet, supported.')
6123         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6124
6125     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6126
6127     net_location = ''
6128     if iri_parts.username:
6129         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6130         if iri_parts.password is not None:
6131             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6132         net_location += '@'
6133
6134     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6135     # The 'idna' encoding produces ASCII text.
6136     if iri_parts.port is not None and iri_parts.port != 80:
6137         net_location += ':' + str(iri_parts.port)
6138
6139     return compat_urllib_parse_urlunparse(
6140         (iri_parts.scheme,
6141             net_location,
6142
6143             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6144
6145             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6146             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6147
6148             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6149             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6150
6151             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6152
6153     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6154
6155
6156 def to_high_limit_path(path):
6157     if sys.platform in ['win32', 'cygwin']:
6158         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6159         return r'\\?\ '.rstrip() + os.path.abspath(path)
6160
6161     return path
6162
6163
6164 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6165     val = obj.get(field, default)
6166     if func and val not in ignore:
6167         val = func(val)
6168     return template % val if val not in ignore else default
6169
6170
6171 def clean_podcast_url(url):
6172     return re.sub(r'''(?x)
6173         (?:
6174             (?:
6175                 chtbl\.com/track|
6176                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6177                 play\.podtrac\.com
6178             )/[^/]+|
6179             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6180             flex\.acast\.com|
6181             pd(?:
6182                 cn\.co| # https://podcorn.com/analytics-prefix/
6183                 st\.fm # https://podsights.com/docs/
6184             )/e
6185         )/''', '', url)
6186
6187
6188 _HEX_TABLE = '0123456789abcdef'
6189
6190
6191 def random_uuidv4():
6192     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6193
6194
6195 def make_dir(path, to_screen=None):
6196     try:
6197         dn = os.path.dirname(path)
6198         if dn and not os.path.exists(dn):
6199             os.makedirs(dn)
6200         return True
6201     except (OSError, IOError) as err:
6202         if callable(to_screen) is not None:
6203             to_screen('unable to create directory ' + error_to_compat_str(err))
6204         return False
6205
6206
6207 def get_executable_path():
6208     from zipimport import zipimporter
6209     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6210         path = os.path.dirname(sys.executable)
6211     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6212         path = os.path.join(os.path.dirname(__file__), '../..')
6213     else:
6214         path = os.path.join(os.path.dirname(__file__), '..')
6215     return os.path.abspath(path)
6216
6217
6218 def load_plugins(name, suffix, namespace):
6219     plugin_info = [None]
6220     classes = []
6221     try:
6222         plugin_info = imp.find_module(
6223             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6224         plugins = imp.load_module(name, *plugin_info)
6225         for name in dir(plugins):
6226             if name in namespace:
6227                 continue
6228             if not name.endswith(suffix):
6229                 continue
6230             klass = getattr(plugins, name)
6231             classes.append(klass)
6232             namespace[name] = klass
6233     except ImportError:
6234         pass
6235     finally:
6236         if plugin_info[0] is not None:
6237             plugin_info[0].close()
6238     return classes
6239
6240
6241 def traverse_obj(
6242         obj, *path_list, default=None, expected_type=None, get_all=True,
6243         casesense=True, is_user_input=False, traverse_string=False):
6244     ''' Traverse nested list/dict/tuple
6245     @param path_list        A list of paths which are checked one by one.
6246                             Each path is a list of keys where each key is a string,
6247                             a tuple of strings or "...". When a tuple is given,
6248                             all the keys given in the tuple are traversed, and
6249                             "..." traverses all the keys in the object
6250     @param default          Default value to return
6251     @param expected_type    Only accept final value of this type (Can also be any callable)
6252     @param get_all          Return all the values obtained from a path or only the first one
6253     @param casesense        Whether to consider dictionary keys as case sensitive
6254     @param is_user_input    Whether the keys are generated from user input. If True,
6255                             strings are converted to int/slice if necessary
6256     @param traverse_string  Whether to traverse inside strings. If True, any
6257                             non-compatible object will also be converted into a string
6258     # TODO: Write tests
6259     '''
6260     if not casesense:
6261         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6262         path_list = (map(_lower, variadic(path)) for path in path_list)
6263
6264     def _traverse_obj(obj, path, _current_depth=0):
6265         nonlocal depth
6266         if obj is None:
6267             return None
6268         path = tuple(variadic(path))
6269         for i, key in enumerate(path):
6270             if isinstance(key, (list, tuple)):
6271                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6272                 key = ...
6273             if key is ...:
6274                 obj = (obj.values() if isinstance(obj, dict)
6275                        else obj if isinstance(obj, (list, tuple, LazyList))
6276                        else str(obj) if traverse_string else [])
6277                 _current_depth += 1
6278                 depth = max(depth, _current_depth)
6279                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6280             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6281                 obj = (obj.get(key) if casesense or (key in obj)
6282                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6283             else:
6284                 if is_user_input:
6285                     key = (int_or_none(key) if ':' not in key
6286                            else slice(*map(int_or_none, key.split(':'))))
6287                     if key == slice(None):
6288                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6289                 if not isinstance(key, (int, slice)):
6290                     return None
6291                 if not isinstance(obj, (list, tuple, LazyList)):
6292                     if not traverse_string:
6293                         return None
6294                     obj = str(obj)
6295                 try:
6296                     obj = obj[key]
6297                 except IndexError:
6298                     return None
6299         return obj
6300
6301     if isinstance(expected_type, type):
6302         type_test = lambda val: val if isinstance(val, expected_type) else None
6303     elif expected_type is not None:
6304         type_test = expected_type
6305     else:
6306         type_test = lambda val: val
6307
6308     for path in path_list:
6309         depth = 0
6310         val = _traverse_obj(obj, path)
6311         if val is not None:
6312             if depth:
6313                 for _ in range(depth - 1):
6314                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6315                 val = [v for v in map(type_test, val) if v is not None]
6316                 if val:
6317                     return val if get_all else val[0]
6318             else:
6319                 val = type_test(val)
6320                 if val is not None:
6321                     return val
6322     return default
6323
6324
6325 def traverse_dict(dictn, keys, casesense=True):
6326     ''' For backward compatibility. Do not use '''
6327     return traverse_obj(dictn, keys, casesense=casesense,
6328                         is_user_input=True, traverse_string=True)
6329
6330
6331 def variadic(x, allowed_types=(str, bytes)):
6332     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)