yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y/%m/%d',
1744     '%Y/%m/%d %H:%M',
1745     '%Y/%m/%d %H:%M:%S',
1746     '%Y-%m-%d %H:%M',
1747     '%Y-%m-%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M:%S.%f',
1749     '%Y-%m-%d %H:%M:%S:%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     if s == '':
2111         return ''
2112     # Handle timestamps
2113     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114     result = ''.join(map(replace_insane, s))
2115     if not is_id:
2116         while '__' in result:
2117             result = result.replace('__', '_')
2118         result = result.strip('_')
2119         # Common case of "Foreign band name - English song title"
2120         if restricted and result.startswith('-_'):
2121             result = result[2:]
2122         if result.startswith('-'):
2123             result = '_' + result[len('-'):]
2124         result = result.lstrip('.')
2125         if not result:
2126             result = '_'
2127     return result
2128
2129
2130 def sanitize_path(s, force=False):
2131     """Sanitizes and normalizes path on Windows"""
2132     if sys.platform == 'win32':
2133         force = False
2134         drive_or_unc, _ = os.path.splitdrive(s)
2135         if sys.version_info < (2, 7) and not drive_or_unc:
2136             drive_or_unc, _ = os.path.splitunc(s)
2137     elif force:
2138         drive_or_unc = ''
2139     else:
2140         return s
2141
2142     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143     if drive_or_unc:
2144         norm_path.pop(0)
2145     sanitized_path = [
2146         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147         for path_part in norm_path]
2148     if drive_or_unc:
2149         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150     elif force and s[0] == os.path.sep:
2151         sanitized_path.insert(0, os.path.sep)
2152     return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157     # the number of unwanted failures due to missing protocol
2158     if url.startswith('//'):
2159         return 'http:%s' % url
2160     # Fix some common typos seen so far
2161     COMMON_TYPOS = (
2162         # https://github.com/ytdl-org/youtube-dl/issues/15649
2163         (r'^httpss://', r'https://'),
2164         # https://bx1.be/lives/direct-tv/
2165         (r'^rmtp([es]?)://', r'rtmp\1://'),
2166     )
2167     for mistake, fixup in COMMON_TYPOS:
2168         if re.match(mistake, url):
2169             return re.sub(mistake, fixup, url)
2170     return url
2171
2172
2173 def extract_basic_auth(url):
2174     parts = compat_urlparse.urlsplit(url)
2175     if parts.username is None:
2176         return url, None
2177     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178         parts.hostname if parts.port is None
2179         else '%s:%d' % (parts.hostname, parts.port))))
2180     auth_payload = base64.b64encode(
2181         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182     return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187     if auth_header is not None:
2188         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189         headers['Authorization'] = auth_header
2190     return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194     """Expand shell variables and ~"""
2195     return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199     """ Remove all duplicates from the input iterable """
2200     res = []
2201     for el in iterable:
2202         if el not in res:
2203             res.append(el)
2204     return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208     """Transforms an HTML entity to a character."""
2209     entity = entity_with_semicolon[:-1]
2210
2211     # Known non-numeric HTML entity
2212     if entity in compat_html_entities.name2codepoint:
2213         return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215     # TODO: HTML5 allows entities without a semicolon. For example,
2216     # '&Eacuteric' should be decoded as 'Éric'.
2217     if entity_with_semicolon in compat_html_entities_html5:
2218         return compat_html_entities_html5[entity_with_semicolon]
2219
2220     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221     if mobj is not None:
2222         numstr = mobj.group(1)
2223         if numstr.startswith('x'):
2224             base = 16
2225             numstr = '0%s' % numstr
2226         else:
2227             base = 10
2228         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229         try:
2230             return compat_chr(int(numstr, base))
2231         except ValueError:
2232             pass
2233
2234     # Unknown entity in name, return its literal representation
2235     return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239     if s is None:
2240         return None
2241     assert type(s) == compat_str
2242
2243     return re.sub(
2244         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def escapeHTML(text):
2248     return (
2249         text
2250         .replace('&', '&amp;')
2251         .replace('<', '&lt;')
2252         .replace('>', '&gt;')
2253         .replace('"', '&quot;')
2254         .replace("'", '&#39;')
2255     )
2256
2257
2258 def process_communicate_or_kill(p, *args, **kwargs):
2259     try:
2260         return p.communicate(*args, **kwargs)
2261     except BaseException:  # Including KeyboardInterrupt
2262         p.kill()
2263         p.wait()
2264         raise
2265
2266
2267 def get_subprocess_encoding():
2268     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269         # For subprocess calls, encode with locale encoding
2270         # Refer to http://stackoverflow.com/a/9951851/35070
2271         encoding = preferredencoding()
2272     else:
2273         encoding = sys.getfilesystemencoding()
2274     if encoding is None:
2275         encoding = 'utf-8'
2276     return encoding
2277
2278
2279 def encodeFilename(s, for_subprocess=False):
2280     """
2281     @param s The name of the file
2282     """
2283
2284     assert type(s) == compat_str
2285
2286     # Python 3 has a Unicode API
2287     if sys.version_info >= (3, 0):
2288         return s
2289
2290     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294         return s
2295
2296     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297     if sys.platform.startswith('java'):
2298         return s
2299
2300     return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def decodeFilename(b, for_subprocess=False):
2304
2305     if sys.version_info >= (3, 0):
2306         return b
2307
2308     if not isinstance(b, bytes):
2309         return b
2310
2311     return b.decode(get_subprocess_encoding(), 'ignore')
2312
2313
2314 def encodeArgument(s):
2315     if not isinstance(s, compat_str):
2316         # Legacy code that uses byte strings
2317         # Uncomment the following line after fixing all post processors
2318         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2319         s = s.decode('ascii')
2320     return encodeFilename(s, True)
2321
2322
2323 def decodeArgument(b):
2324     return decodeFilename(b, True)
2325
2326
2327 def decodeOption(optval):
2328     if optval is None:
2329         return optval
2330     if isinstance(optval, bytes):
2331         optval = optval.decode(preferredencoding())
2332
2333     assert isinstance(optval, compat_str)
2334     return optval
2335
2336
2337 def formatSeconds(secs, delim=':', msec=False):
2338     if secs > 3600:
2339         ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2340     elif secs > 60:
2341         ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2342     else:
2343         ret = '%d' % secs
2344     return '%s.%03d' % (ret, secs % 1) if msec else ret
2345
2346
2347 def make_HTTPS_handler(params, **kwargs):
2348     opts_no_check_certificate = params.get('nocheckcertificate', False)
2349     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2350         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2351         if opts_no_check_certificate:
2352             context.check_hostname = False
2353             context.verify_mode = ssl.CERT_NONE
2354         try:
2355             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2356         except TypeError:
2357             # Python 2.7.8
2358             # (create_default_context present but HTTPSHandler has no context=)
2359             pass
2360
2361     if sys.version_info < (3, 2):
2362         return YoutubeDLHTTPSHandler(params, **kwargs)
2363     else:  # Python < 3.4
2364         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2365         context.verify_mode = (ssl.CERT_NONE
2366                                if opts_no_check_certificate
2367                                else ssl.CERT_REQUIRED)
2368         context.set_default_verify_paths()
2369         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2370
2371
2372 def bug_reports_message(before=';'):
2373     if ytdl_is_updateable():
2374         update_cmd = 'type  yt-dlp -U  to update'
2375     else:
2376         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2377     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2378     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2379     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2380
2381     before = before.rstrip()
2382     if not before or before.endswith(('.', '!', '?')):
2383         msg = msg[0].title() + msg[1:]
2384
2385     return (before + ' ' if before else '') + msg
2386
2387
2388 class YoutubeDLError(Exception):
2389     """Base exception for YoutubeDL errors."""
2390     pass
2391
2392
2393 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394 if hasattr(ssl, 'CertificateError'):
2395     network_exceptions.append(ssl.CertificateError)
2396 network_exceptions = tuple(network_exceptions)
2397
2398
2399 class ExtractorError(YoutubeDLError):
2400     """Error during info extraction."""
2401
2402     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2403         """ tb, if given, is the original traceback (so that it can be printed out).
2404         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2405         """
2406
2407         if sys.exc_info()[0] in network_exceptions:
2408             expected = True
2409         if video_id is not None:
2410             msg = video_id + ': ' + msg
2411         if cause:
2412             msg += ' (caused by %r)' % cause
2413         if not expected:
2414             msg += bug_reports_message()
2415         super(ExtractorError, self).__init__(msg)
2416
2417         self.traceback = tb
2418         self.exc_info = sys.exc_info()  # preserve original exception
2419         self.cause = cause
2420         self.video_id = video_id
2421
2422     def format_traceback(self):
2423         if self.traceback is None:
2424             return None
2425         return ''.join(traceback.format_tb(self.traceback))
2426
2427
2428 class UnsupportedError(ExtractorError):
2429     def __init__(self, url):
2430         super(UnsupportedError, self).__init__(
2431             'Unsupported URL: %s' % url, expected=True)
2432         self.url = url
2433
2434
2435 class RegexNotFoundError(ExtractorError):
2436     """Error when a regex didn't match"""
2437     pass
2438
2439
2440 class GeoRestrictedError(ExtractorError):
2441     """Geographic restriction Error exception.
2442
2443     This exception may be thrown when a video is not available from your
2444     geographic location due to geographic restrictions imposed by a website.
2445     """
2446
2447     def __init__(self, msg, countries=None):
2448         super(GeoRestrictedError, self).__init__(msg, expected=True)
2449         self.msg = msg
2450         self.countries = countries
2451
2452
2453 class DownloadError(YoutubeDLError):
2454     """Download Error exception.
2455
2456     This exception may be thrown by FileDownloader objects if they are not
2457     configured to continue on errors. They will contain the appropriate
2458     error message.
2459     """
2460
2461     def __init__(self, msg, exc_info=None):
2462         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463         super(DownloadError, self).__init__(msg)
2464         self.exc_info = exc_info
2465
2466
2467 class EntryNotInPlaylist(YoutubeDLError):
2468     """Entry not in playlist exception.
2469
2470     This exception will be thrown by YoutubeDL when a requested entry
2471     is not found in the playlist info_dict
2472     """
2473     pass
2474
2475
2476 class SameFileError(YoutubeDLError):
2477     """Same File exception.
2478
2479     This exception will be thrown by FileDownloader objects if they detect
2480     multiple files would have to be downloaded to the same file on disk.
2481     """
2482     pass
2483
2484
2485 class PostProcessingError(YoutubeDLError):
2486     """Post Processing exception.
2487
2488     This exception may be raised by PostProcessor's .run() method to
2489     indicate an error in the postprocessing task.
2490     """
2491
2492     def __init__(self, msg):
2493         super(PostProcessingError, self).__init__(msg)
2494         self.msg = msg
2495
2496
2497 class ExistingVideoReached(YoutubeDLError):
2498     """ --max-downloads limit has been reached. """
2499     pass
2500
2501
2502 class RejectedVideoReached(YoutubeDLError):
2503     """ --max-downloads limit has been reached. """
2504     pass
2505
2506
2507 class ThrottledDownload(YoutubeDLError):
2508     """ Download speed below --throttled-rate. """
2509     pass
2510
2511
2512 class MaxDownloadsReached(YoutubeDLError):
2513     """ --max-downloads limit has been reached. """
2514     pass
2515
2516
2517 class UnavailableVideoError(YoutubeDLError):
2518     """Unavailable Format exception.
2519
2520     This exception will be thrown when a video is requested
2521     in a format that is not available for that video.
2522     """
2523     pass
2524
2525
2526 class ContentTooShortError(YoutubeDLError):
2527     """Content Too Short exception.
2528
2529     This exception may be raised by FileDownloader objects when a file they
2530     download is too small for what the server announced first, indicating
2531     the connection was probably interrupted.
2532     """
2533
2534     def __init__(self, downloaded, expected):
2535         super(ContentTooShortError, self).__init__(
2536             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537         )
2538         # Both in bytes
2539         self.downloaded = downloaded
2540         self.expected = expected
2541
2542
2543 class XAttrMetadataError(YoutubeDLError):
2544     def __init__(self, code=None, msg='Unknown error'):
2545         super(XAttrMetadataError, self).__init__(msg)
2546         self.code = code
2547         self.msg = msg
2548
2549         # Parsing code and msg
2550         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2551                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2552             self.reason = 'NO_SPACE'
2553         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554             self.reason = 'VALUE_TOO_LONG'
2555         else:
2556             self.reason = 'NOT_SUPPORTED'
2557
2558
2559 class XAttrUnavailableError(YoutubeDLError):
2560     pass
2561
2562
2563 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2564     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565     # expected HTTP responses to meet HTTP/1.0 or later (see also
2566     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2567     if sys.version_info < (3, 0):
2568         kwargs['strict'] = True
2569     hc = http_class(*args, **compat_kwargs(kwargs))
2570     source_address = ydl_handler._params.get('source_address')
2571
2572     if source_address is not None:
2573         # This is to workaround _create_connection() from socket where it will try all
2574         # address data from getaddrinfo() including IPv6. This filters the result from
2575         # getaddrinfo() based on the source_address value.
2576         # This is based on the cpython socket.create_connection() function.
2577         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579             host, port = address
2580             err = None
2581             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2582             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583             ip_addrs = [addr for addr in addrs if addr[0] == af]
2584             if addrs and not ip_addrs:
2585                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586                 raise socket.error(
2587                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588                     % (ip_version, source_address[0]))
2589             for res in ip_addrs:
2590                 af, socktype, proto, canonname, sa = res
2591                 sock = None
2592                 try:
2593                     sock = socket.socket(af, socktype, proto)
2594                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595                         sock.settimeout(timeout)
2596                     sock.bind(source_address)
2597                     sock.connect(sa)
2598                     err = None  # Explicitly break reference cycle
2599                     return sock
2600                 except socket.error as _:
2601                     err = _
2602                     if sock is not None:
2603                         sock.close()
2604             if err is not None:
2605                 raise err
2606             else:
2607                 raise socket.error('getaddrinfo returns an empty list')
2608         if hasattr(hc, '_create_connection'):
2609             hc._create_connection = _create_connection
2610         sa = (source_address, 0)
2611         if hasattr(hc, 'source_address'):  # Python 2.7+
2612             hc.source_address = sa
2613         else:  # Python 2.6
2614             def _hc_connect(self, *args, **kwargs):
2615                 sock = _create_connection(
2616                     (self.host, self.port), self.timeout, sa)
2617                 if is_https:
2618                     self.sock = ssl.wrap_socket(
2619                         sock, self.key_file, self.cert_file,
2620                         ssl_version=ssl.PROTOCOL_TLSv1)
2621                 else:
2622                     self.sock = sock
2623             hc.connect = functools.partial(_hc_connect, hc)
2624
2625     return hc
2626
2627
2628 def handle_youtubedl_headers(headers):
2629     filtered_headers = headers
2630
2631     if 'Youtubedl-no-compression' in filtered_headers:
2632         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2633         del filtered_headers['Youtubedl-no-compression']
2634
2635     return filtered_headers
2636
2637
2638 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2639     """Handler for HTTP requests and responses.
2640
2641     This class, when installed with an OpenerDirector, automatically adds
2642     the standard headers to every HTTP request and handles gzipped and
2643     deflated responses from web servers. If compression is to be avoided in
2644     a particular request, the original request in the program code only has
2645     to include the HTTP header "Youtubedl-no-compression", which will be
2646     removed before making the real request.
2647
2648     Part of this code was copied from:
2649
2650     http://techknack.net/python-urllib2-handlers/
2651
2652     Andrew Rowls, the author of that code, agreed to release it to the
2653     public domain.
2654     """
2655
2656     def __init__(self, params, *args, **kwargs):
2657         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658         self._params = params
2659
2660     def http_open(self, req):
2661         conn_class = compat_http_client.HTTPConnection
2662
2663         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664         if socks_proxy:
2665             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666             del req.headers['Ytdl-socks-proxy']
2667
2668         return self.do_open(functools.partial(
2669             _create_http_connection, self, conn_class, False),
2670             req)
2671
2672     @staticmethod
2673     def deflate(data):
2674         if not data:
2675             return data
2676         try:
2677             return zlib.decompress(data, -zlib.MAX_WBITS)
2678         except zlib.error:
2679             return zlib.decompress(data)
2680
2681     def http_request(self, req):
2682         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683         # always respected by websites, some tend to give out URLs with non percent-encoded
2684         # non-ASCII characters (see telemb.py, ard.py [#3412])
2685         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686         # To work around aforementioned issue we will replace request's original URL with
2687         # percent-encoded one
2688         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690         url = req.get_full_url()
2691         url_escaped = escape_url(url)
2692
2693         # Substitute URL if any change after escaping
2694         if url != url_escaped:
2695             req = update_Request(req, url=url_escaped)
2696
2697         for h, v in std_headers.items():
2698             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699             # The dict keys are capitalized because of this bug by urllib
2700             if h.capitalize() not in req.headers:
2701                 req.add_header(h, v)
2702
2703         req.headers = handle_youtubedl_headers(req.headers)
2704
2705         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706             # Python 2.6 is brain-dead when it comes to fragments
2707             req._Request__original = req._Request__original.partition('#')[0]
2708             req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
2710         return req
2711
2712     def http_response(self, req, resp):
2713         old_resp = resp
2714         # gzip
2715         if resp.headers.get('Content-encoding', '') == 'gzip':
2716             content = resp.read()
2717             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718             try:
2719                 uncompressed = io.BytesIO(gz.read())
2720             except IOError as original_ioerror:
2721                 # There may be junk add the end of the file
2722                 # See http://stackoverflow.com/q/4928560/35070 for details
2723                 for i in range(1, 1024):
2724                     try:
2725                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726                         uncompressed = io.BytesIO(gz.read())
2727                     except IOError:
2728                         continue
2729                     break
2730                 else:
2731                     raise original_ioerror
2732             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2733             resp.msg = old_resp.msg
2734             del resp.headers['Content-encoding']
2735         # deflate
2736         if resp.headers.get('Content-encoding', '') == 'deflate':
2737             gz = io.BytesIO(self.deflate(resp.read()))
2738             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2739             resp.msg = old_resp.msg
2740             del resp.headers['Content-encoding']
2741         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2742         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2743         if 300 <= resp.code < 400:
2744             location = resp.headers.get('Location')
2745             if location:
2746                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747                 if sys.version_info >= (3, 0):
2748                     location = location.encode('iso-8859-1').decode('utf-8')
2749                 else:
2750                     location = location.decode('utf-8')
2751                 location_escaped = escape_url(location)
2752                 if location != location_escaped:
2753                     del resp.headers['Location']
2754                     if sys.version_info < (3, 0):
2755                         location_escaped = location_escaped.encode('utf-8')
2756                     resp.headers['Location'] = location_escaped
2757         return resp
2758
2759     https_request = http_request
2760     https_response = http_response
2761
2762
2763 def make_socks_conn_class(base_class, socks_proxy):
2764     assert issubclass(base_class, (
2765         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767     url_components = compat_urlparse.urlparse(socks_proxy)
2768     if url_components.scheme.lower() == 'socks5':
2769         socks_type = ProxyType.SOCKS5
2770     elif url_components.scheme.lower() in ('socks', 'socks4'):
2771         socks_type = ProxyType.SOCKS4
2772     elif url_components.scheme.lower() == 'socks4a':
2773         socks_type = ProxyType.SOCKS4A
2774
2775     def unquote_if_non_empty(s):
2776         if not s:
2777             return s
2778         return compat_urllib_parse_unquote_plus(s)
2779
2780     proxy_args = (
2781         socks_type,
2782         url_components.hostname, url_components.port or 1080,
2783         True,  # Remote DNS
2784         unquote_if_non_empty(url_components.username),
2785         unquote_if_non_empty(url_components.password),
2786     )
2787
2788     class SocksConnection(base_class):
2789         def connect(self):
2790             self.sock = sockssocket()
2791             self.sock.setproxy(*proxy_args)
2792             if type(self.timeout) in (int, float):
2793                 self.sock.settimeout(self.timeout)
2794             self.sock.connect((self.host, self.port))
2795
2796             if isinstance(self, compat_http_client.HTTPSConnection):
2797                 if hasattr(self, '_context'):  # Python > 2.6
2798                     self.sock = self._context.wrap_socket(
2799                         self.sock, server_hostname=self.host)
2800                 else:
2801                     self.sock = ssl.wrap_socket(self.sock)
2802
2803     return SocksConnection
2804
2805
2806 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810         self._params = params
2811
2812     def https_open(self, req):
2813         kwargs = {}
2814         conn_class = self._https_conn_class
2815
2816         if hasattr(self, '_context'):  # python > 2.6
2817             kwargs['context'] = self._context
2818         if hasattr(self, '_check_hostname'):  # python 3.x
2819             kwargs['check_hostname'] = self._check_hostname
2820
2821         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822         if socks_proxy:
2823             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824             del req.headers['Ytdl-socks-proxy']
2825
2826         return self.do_open(functools.partial(
2827             _create_http_connection, self, conn_class, True),
2828             req, **kwargs)
2829
2830
2831 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2832     """
2833     See [1] for cookie file format.
2834
2835     1. https://curl.haxx.se/docs/http-cookies.html
2836     """
2837     _HTTPONLY_PREFIX = '#HttpOnly_'
2838     _ENTRY_LEN = 7
2839     _HEADER = '''# Netscape HTTP Cookie File
2840 # This file is generated by yt-dlp.  Do not edit.
2841
2842 '''
2843     _CookieFileEntry = collections.namedtuple(
2844         'CookieFileEntry',
2845         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2846
2847     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2848         """
2849         Save cookies to a file.
2850
2851         Most of the code is taken from CPython 3.8 and slightly adapted
2852         to support cookie files with UTF-8 in both python 2 and 3.
2853         """
2854         if filename is None:
2855             if self.filename is not None:
2856                 filename = self.filename
2857             else:
2858                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
2860         # Store session cookies with `expires` set to 0 instead of an empty
2861         # string
2862         for cookie in self:
2863             if cookie.expires is None:
2864                 cookie.expires = 0
2865
2866         with io.open(filename, 'w', encoding='utf-8') as f:
2867             f.write(self._HEADER)
2868             now = time.time()
2869             for cookie in self:
2870                 if not ignore_discard and cookie.discard:
2871                     continue
2872                 if not ignore_expires and cookie.is_expired(now):
2873                     continue
2874                 if cookie.secure:
2875                     secure = 'TRUE'
2876                 else:
2877                     secure = 'FALSE'
2878                 if cookie.domain.startswith('.'):
2879                     initial_dot = 'TRUE'
2880                 else:
2881                     initial_dot = 'FALSE'
2882                 if cookie.expires is not None:
2883                     expires = compat_str(cookie.expires)
2884                 else:
2885                     expires = ''
2886                 if cookie.value is None:
2887                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888                     # with no name, whereas http.cookiejar regards it as a
2889                     # cookie with no value.
2890                     name = ''
2891                     value = cookie.name
2892                 else:
2893                     name = cookie.name
2894                     value = cookie.value
2895                 f.write(
2896                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2897                                secure, expires, name, value]) + '\n')
2898
2899     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2900         """Load cookies from a file."""
2901         if filename is None:
2902             if self.filename is not None:
2903                 filename = self.filename
2904             else:
2905                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
2907         def prepare_line(line):
2908             if line.startswith(self._HTTPONLY_PREFIX):
2909                 line = line[len(self._HTTPONLY_PREFIX):]
2910             # comments and empty lines are fine
2911             if line.startswith('#') or not line.strip():
2912                 return line
2913             cookie_list = line.split('\t')
2914             if len(cookie_list) != self._ENTRY_LEN:
2915                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916             cookie = self._CookieFileEntry(*cookie_list)
2917             if cookie.expires_at and not cookie.expires_at.isdigit():
2918                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919             return line
2920
2921         cf = io.StringIO()
2922         with io.open(filename, encoding='utf-8') as f:
2923             for line in f:
2924                 try:
2925                     cf.write(prepare_line(line))
2926                 except compat_cookiejar.LoadError as e:
2927                     write_string(
2928                         'WARNING: skipping cookie file entry due to %s: %r\n'
2929                         % (e, line), sys.stderr)
2930                     continue
2931         cf.seek(0)
2932         self._really_load(cf, filename, ignore_discard, ignore_expires)
2933         # Session cookies are denoted by either `expires` field set to
2934         # an empty string or 0. MozillaCookieJar only recognizes the former
2935         # (see [1]). So we need force the latter to be recognized as session
2936         # cookies on our own.
2937         # Session cookies may be important for cookies-based authentication,
2938         # e.g. usually, when user does not check 'Remember me' check box while
2939         # logging in on a site, some important cookies are stored as session
2940         # cookies so that not recognizing them will result in failed login.
2941         # 1. https://bugs.python.org/issue17164
2942         for cookie in self:
2943             # Treat `expires=0` cookies as session cookies
2944             if cookie.expires == 0:
2945                 cookie.expires = None
2946                 cookie.discard = True
2947
2948
2949 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950     def __init__(self, cookiejar=None):
2951         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953     def http_response(self, request, response):
2954         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955         # characters in Set-Cookie HTTP header of last response (see
2956         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2957         # In order to at least prevent crashing we will percent encode Set-Cookie
2958         # header before HTTPCookieProcessor starts processing it.
2959         # if sys.version_info < (3, 0) and response.headers:
2960         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961         #         set_cookie = response.headers.get(set_cookie_header)
2962         #         if set_cookie:
2963         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964         #             if set_cookie != set_cookie_escaped:
2965         #                 del response.headers[set_cookie_header]
2966         #                 response.headers[set_cookie_header] = set_cookie_escaped
2967         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
2969     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2970     https_response = http_response
2971
2972
2973 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2974     """YoutubeDL redirect handler
2975
2976     The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978     This redirect handler solves two issues:
2979      - ensures redirect URL is always unicode under python 2
2980      - introduces support for experimental HTTP response status code
2981        308 Permanent Redirect [2] used by some sites [3]
2982
2983     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986     """
2987
2988     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990     def redirect_request(self, req, fp, code, msg, headers, newurl):
2991         """Return a Request or None in response to a redirect.
2992
2993         This is called by the http_error_30x methods when a
2994         redirection response is received.  If a redirection should
2995         take place, return a new Request to allow http_error_30x to
2996         perform the redirect.  Otherwise, raise HTTPError if no-one
2997         else should try to handle this url.  Return None if you can't
2998         but another Handler might.
2999         """
3000         m = req.get_method()
3001         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002                  or code in (301, 302, 303) and m == "POST")):
3003             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004         # Strictly (according to RFC 2616), 301 or 302 in response to
3005         # a POST MUST NOT cause a redirection without confirmation
3006         # from the user (of urllib.request, in this case).  In practice,
3007         # essentially all clients do redirect in this case, so we do
3008         # the same.
3009
3010         # On python 2 urlh.geturl() may sometimes return redirect URL
3011         # as byte string instead of unicode. This workaround allows
3012         # to force it always return unicode.
3013         if sys.version_info[0] < 3:
3014             newurl = compat_str(newurl)
3015
3016         # Be conciliant with URIs containing a space.  This is mainly
3017         # redundant with the more complete encoding done in http_error_302(),
3018         # but it is kept for compatibility with other callers.
3019         newurl = newurl.replace(' ', '%20')
3020
3021         CONTENT_HEADERS = ("content-length", "content-type")
3022         # NB: don't use dict comprehension for python 2.6 compatibility
3023         newheaders = dict((k, v) for k, v in req.headers.items()
3024                           if k.lower() not in CONTENT_HEADERS)
3025         return compat_urllib_request.Request(
3026             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027             unverifiable=True)
3028
3029
3030 def extract_timezone(date_str):
3031     m = re.search(
3032         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033         date_str)
3034     if not m:
3035         timezone = datetime.timedelta()
3036     else:
3037         date_str = date_str[:-len(m.group('tz'))]
3038         if not m.group('sign'):
3039             timezone = datetime.timedelta()
3040         else:
3041             sign = 1 if m.group('sign') == '+' else -1
3042             timezone = datetime.timedelta(
3043                 hours=sign * int(m.group('hours')),
3044                 minutes=sign * int(m.group('minutes')))
3045     return timezone, date_str
3046
3047
3048 def parse_iso8601(date_str, delimiter='T', timezone=None):
3049     """ Return a UNIX timestamp from the given date """
3050
3051     if date_str is None:
3052         return None
3053
3054     date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
3056     if timezone is None:
3057         timezone, date_str = extract_timezone(date_str)
3058
3059     try:
3060         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062         return calendar.timegm(dt.timetuple())
3063     except ValueError:
3064         pass
3065
3066
3067 def date_formats(day_first=True):
3068     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
3071 def unified_strdate(date_str, day_first=True):
3072     """Return a string with the date in the format YYYYMMDD"""
3073
3074     if date_str is None:
3075         return None
3076     upload_date = None
3077     # Replace commas
3078     date_str = date_str.replace(',', ' ')
3079     # Remove AM/PM + timezone
3080     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3081     _, date_str = extract_timezone(date_str)
3082
3083     for expression in date_formats(day_first):
3084         try:
3085             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3086         except ValueError:
3087             pass
3088     if upload_date is None:
3089         timetuple = email.utils.parsedate_tz(date_str)
3090         if timetuple:
3091             try:
3092                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093             except ValueError:
3094                 pass
3095     if upload_date is not None:
3096         return compat_str(upload_date)
3097
3098
3099 def unified_timestamp(date_str, day_first=True):
3100     if date_str is None:
3101         return None
3102
3103     date_str = re.sub(r'[,|]', '', date_str)
3104
3105     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3106     timezone, date_str = extract_timezone(date_str)
3107
3108     # Remove AM/PM + timezone
3109     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
3111     # Remove unrecognized timezones from ISO 8601 alike timestamps
3112     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113     if m:
3114         date_str = date_str[:-len(m.group('tz'))]
3115
3116     # Python only supports microseconds, so remove nanoseconds
3117     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118     if m:
3119         date_str = m.group(1)
3120
3121     for expression in date_formats(day_first):
3122         try:
3123             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3124             return calendar.timegm(dt.timetuple())
3125         except ValueError:
3126             pass
3127     timetuple = email.utils.parsedate_tz(date_str)
3128     if timetuple:
3129         return calendar.timegm(timetuple) + pm_delta * 3600
3130
3131
3132 def determine_ext(url, default_ext='unknown_video'):
3133     if url is None or '.' not in url:
3134         return default_ext
3135     guess = url.partition('?')[0].rpartition('.')[2]
3136     if re.match(r'^[A-Za-z0-9]+$', guess):
3137         return guess
3138     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3140         return guess.rstrip('/')
3141     else:
3142         return default_ext
3143
3144
3145 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3147
3148
3149 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3150     """
3151     Return a datetime object from a string in the format YYYYMMDD or
3152     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154     format: string date format used to return datetime object from
3155     precision: round the time portion of a datetime object.
3156                 auto|microsecond|second|minute|hour|day.
3157                 auto: round to the unit provided in date_str (if applicable).
3158     """
3159     auto_precision = False
3160     if precision == 'auto':
3161         auto_precision = True
3162         precision = 'microsecond'
3163     today = datetime_round(datetime.datetime.now(), precision)
3164     if date_str in ('now', 'today'):
3165         return today
3166     if date_str == 'yesterday':
3167         return today - datetime.timedelta(days=1)
3168     match = re.match(
3169         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170         date_str)
3171     if match is not None:
3172         start_time = datetime_from_str(match.group('start'), precision, format)
3173         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3174         unit = match.group('unit')
3175         if unit == 'month' or unit == 'year':
3176             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3177             unit = 'day'
3178         else:
3179             if unit == 'week':
3180                 unit = 'day'
3181                 time *= 7
3182             delta = datetime.timedelta(**{unit + 's': time})
3183             new_date = start_time + delta
3184         if auto_precision:
3185             return datetime_round(new_date, unit)
3186         return new_date
3187
3188     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191 def date_from_str(date_str, format='%Y%m%d'):
3192     """
3193     Return a datetime object from a string in the format YYYYMMDD or
3194     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196     format: string date format used to return datetime object from
3197     """
3198     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201 def datetime_add_months(dt, months):
3202     """Increment/Decrement a datetime object by months."""
3203     month = dt.month + months - 1
3204     year = dt.year + month // 12
3205     month = month % 12 + 1
3206     day = min(dt.day, calendar.monthrange(year, month)[1])
3207     return dt.replace(year, month, day)
3208
3209
3210 def datetime_round(dt, precision='day'):
3211     """
3212     Round a datetime object's time to a specific precision
3213     """
3214     if precision == 'microsecond':
3215         return dt
3216
3217     unit_seconds = {
3218         'day': 86400,
3219         'hour': 3600,
3220         'minute': 60,
3221         'second': 1,
3222     }
3223     roundto = lambda x, n: ((x + n / 2) // n) * n
3224     timestamp = calendar.timegm(dt.timetuple())
3225     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3226
3227
3228 def hyphenate_date(date_str):
3229     """
3230     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232     if match is not None:
3233         return '-'.join(match.groups())
3234     else:
3235         return date_str
3236
3237
3238 class DateRange(object):
3239     """Represents a time interval between two dates"""
3240
3241     def __init__(self, start=None, end=None):
3242         """start and end must be strings in the format accepted by date"""
3243         if start is not None:
3244             self.start = date_from_str(start)
3245         else:
3246             self.start = datetime.datetime.min.date()
3247         if end is not None:
3248             self.end = date_from_str(end)
3249         else:
3250             self.end = datetime.datetime.max.date()
3251         if self.start > self.end:
3252             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3253
3254     @classmethod
3255     def day(cls, day):
3256         """Returns a range that only contains the given day"""
3257         return cls(day, day)
3258
3259     def __contains__(self, date):
3260         """Check if the date is in the range"""
3261         if not isinstance(date, datetime.date):
3262             date = date_from_str(date)
3263         return self.start <= date <= self.end
3264
3265     def __str__(self):
3266         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3267
3268
3269 def platform_name():
3270     """ Returns the platform name as a compat_str """
3271     res = platform.platform()
3272     if isinstance(res, bytes):
3273         res = res.decode(preferredencoding())
3274
3275     assert isinstance(res, compat_str)
3276     return res
3277
3278
3279 def _windows_write_string(s, out):
3280     """ Returns True if the string was written using special methods,
3281     False if it has yet to be written out."""
3282     # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284     import ctypes
3285     import ctypes.wintypes
3286
3287     WIN_OUTPUT_IDS = {
3288         1: -11,
3289         2: -12,
3290     }
3291
3292     try:
3293         fileno = out.fileno()
3294     except AttributeError:
3295         # If the output stream doesn't have a fileno, it's virtual
3296         return False
3297     except io.UnsupportedOperation:
3298         # Some strange Windows pseudo files?
3299         return False
3300     if fileno not in WIN_OUTPUT_IDS:
3301         return False
3302
3303     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3304         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3305         ('GetStdHandle', ctypes.windll.kernel32))
3306     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
3308     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3309         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3311         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3312     written = ctypes.wintypes.DWORD(0)
3313
3314     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3315     FILE_TYPE_CHAR = 0x0002
3316     FILE_TYPE_REMOTE = 0x8000
3317     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3318         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319         ctypes.POINTER(ctypes.wintypes.DWORD))(
3320         ('GetConsoleMode', ctypes.windll.kernel32))
3321     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323     def not_a_console(handle):
3324         if handle == INVALID_HANDLE_VALUE or handle is None:
3325             return True
3326         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3328
3329     if not_a_console(h):
3330         return False
3331
3332     def next_nonbmp_pos(s):
3333         try:
3334             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335         except StopIteration:
3336             return len(s)
3337
3338     while s:
3339         count = min(next_nonbmp_pos(s), 1024)
3340
3341         ret = WriteConsoleW(
3342             h, s, count if count else 2, ctypes.byref(written), None)
3343         if ret == 0:
3344             raise OSError('Failed to write string')
3345         if not count:  # We just wrote a non-BMP character
3346             assert written.value == 2
3347             s = s[1:]
3348         else:
3349             assert written.value > 0
3350             s = s[written.value:]
3351     return True
3352
3353
3354 def write_string(s, out=None, encoding=None):
3355     if out is None:
3356         out = sys.stderr
3357     assert type(s) == compat_str
3358
3359     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360         if _windows_write_string(s, out):
3361             return
3362
3363     if ('b' in getattr(out, 'mode', '')
3364             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3365         byt = s.encode(encoding or preferredencoding(), 'ignore')
3366         out.write(byt)
3367     elif hasattr(out, 'buffer'):
3368         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369         byt = s.encode(enc, 'ignore')
3370         out.buffer.write(byt)
3371     else:
3372         out.write(s)
3373     out.flush()
3374
3375
3376 def bytes_to_intlist(bs):
3377     if not bs:
3378         return []
3379     if isinstance(bs[0], int):  # Python 3
3380         return list(bs)
3381     else:
3382         return [ord(c) for c in bs]
3383
3384
3385 def intlist_to_bytes(xs):
3386     if not xs:
3387         return b''
3388     return compat_struct_pack('%dB' % len(xs), *xs)
3389
3390
3391 # Cross-platform file locking
3392 if sys.platform == 'win32':
3393     import ctypes.wintypes
3394     import msvcrt
3395
3396     class OVERLAPPED(ctypes.Structure):
3397         _fields_ = [
3398             ('Internal', ctypes.wintypes.LPVOID),
3399             ('InternalHigh', ctypes.wintypes.LPVOID),
3400             ('Offset', ctypes.wintypes.DWORD),
3401             ('OffsetHigh', ctypes.wintypes.DWORD),
3402             ('hEvent', ctypes.wintypes.HANDLE),
3403         ]
3404
3405     kernel32 = ctypes.windll.kernel32
3406     LockFileEx = kernel32.LockFileEx
3407     LockFileEx.argtypes = [
3408         ctypes.wintypes.HANDLE,     # hFile
3409         ctypes.wintypes.DWORD,      # dwFlags
3410         ctypes.wintypes.DWORD,      # dwReserved
3411         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3412         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3413         ctypes.POINTER(OVERLAPPED)  # Overlapped
3414     ]
3415     LockFileEx.restype = ctypes.wintypes.BOOL
3416     UnlockFileEx = kernel32.UnlockFileEx
3417     UnlockFileEx.argtypes = [
3418         ctypes.wintypes.HANDLE,     # hFile
3419         ctypes.wintypes.DWORD,      # dwReserved
3420         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3421         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3422         ctypes.POINTER(OVERLAPPED)  # Overlapped
3423     ]
3424     UnlockFileEx.restype = ctypes.wintypes.BOOL
3425     whole_low = 0xffffffff
3426     whole_high = 0x7fffffff
3427
3428     def _lock_file(f, exclusive):
3429         overlapped = OVERLAPPED()
3430         overlapped.Offset = 0
3431         overlapped.OffsetHigh = 0
3432         overlapped.hEvent = 0
3433         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434         handle = msvcrt.get_osfhandle(f.fileno())
3435         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436                           whole_low, whole_high, f._lock_file_overlapped_p):
3437             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439     def _unlock_file(f):
3440         assert f._lock_file_overlapped_p
3441         handle = msvcrt.get_osfhandle(f.fileno())
3442         if not UnlockFileEx(handle, 0,
3443                             whole_low, whole_high, f._lock_file_overlapped_p):
3444             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446 else:
3447     # Some platforms, such as Jython, is missing fcntl
3448     try:
3449         import fcntl
3450
3451         def _lock_file(f, exclusive):
3452             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3453
3454         def _unlock_file(f):
3455             fcntl.flock(f, fcntl.LOCK_UN)
3456     except ImportError:
3457         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459         def _lock_file(f, exclusive):
3460             raise IOError(UNSUPPORTED_MSG)
3461
3462         def _unlock_file(f):
3463             raise IOError(UNSUPPORTED_MSG)
3464
3465
3466 class locked_file(object):
3467     def __init__(self, filename, mode, encoding=None):
3468         assert mode in ['r', 'a', 'w']
3469         self.f = io.open(filename, mode, encoding=encoding)
3470         self.mode = mode
3471
3472     def __enter__(self):
3473         exclusive = self.mode != 'r'
3474         try:
3475             _lock_file(self.f, exclusive)
3476         except IOError:
3477             self.f.close()
3478             raise
3479         return self
3480
3481     def __exit__(self, etype, value, traceback):
3482         try:
3483             _unlock_file(self.f)
3484         finally:
3485             self.f.close()
3486
3487     def __iter__(self):
3488         return iter(self.f)
3489
3490     def write(self, *args):
3491         return self.f.write(*args)
3492
3493     def read(self, *args):
3494         return self.f.read(*args)
3495
3496
3497 def get_filesystem_encoding():
3498     encoding = sys.getfilesystemencoding()
3499     return encoding if encoding is not None else 'utf-8'
3500
3501
3502 def shell_quote(args):
3503     quoted_args = []
3504     encoding = get_filesystem_encoding()
3505     for a in args:
3506         if isinstance(a, bytes):
3507             # We may get a filename encoded with 'encodeFilename'
3508             a = a.decode(encoding)
3509         quoted_args.append(compat_shlex_quote(a))
3510     return ' '.join(quoted_args)
3511
3512
3513 def smuggle_url(url, data):
3514     """ Pass additional data in a URL for internal use. """
3515
3516     url, idata = unsmuggle_url(url, {})
3517     data.update(idata)
3518     sdata = compat_urllib_parse_urlencode(
3519         {'__youtubedl_smuggle': json.dumps(data)})
3520     return url + '#' + sdata
3521
3522
3523 def unsmuggle_url(smug_url, default=None):
3524     if '#__youtubedl_smuggle' not in smug_url:
3525         return smug_url, default
3526     url, _, sdata = smug_url.rpartition('#')
3527     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3528     data = json.loads(jsond)
3529     return url, data
3530
3531
3532 def format_bytes(bytes):
3533     if bytes is None:
3534         return 'N/A'
3535     if type(bytes) is str:
3536         bytes = float(bytes)
3537     if bytes == 0.0:
3538         exponent = 0
3539     else:
3540         exponent = int(math.log(bytes, 1024.0))
3541     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3542     converted = float(bytes) / float(1024 ** exponent)
3543     return '%.2f%s' % (converted, suffix)
3544
3545
3546 def lookup_unit_table(unit_table, s):
3547     units_re = '|'.join(re.escape(u) for u in unit_table)
3548     m = re.match(
3549         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3550     if not m:
3551         return None
3552     num_str = m.group('num').replace(',', '.')
3553     mult = unit_table[m.group('unit')]
3554     return int(float(num_str) * mult)
3555
3556
3557 def parse_filesize(s):
3558     if s is None:
3559         return None
3560
3561     # The lower-case forms are of course incorrect and unofficial,
3562     # but we support those too
3563     _UNIT_TABLE = {
3564         'B': 1,
3565         'b': 1,
3566         'bytes': 1,
3567         'KiB': 1024,
3568         'KB': 1000,
3569         'kB': 1024,
3570         'Kb': 1000,
3571         'kb': 1000,
3572         'kilobytes': 1000,
3573         'kibibytes': 1024,
3574         'MiB': 1024 ** 2,
3575         'MB': 1000 ** 2,
3576         'mB': 1024 ** 2,
3577         'Mb': 1000 ** 2,
3578         'mb': 1000 ** 2,
3579         'megabytes': 1000 ** 2,
3580         'mebibytes': 1024 ** 2,
3581         'GiB': 1024 ** 3,
3582         'GB': 1000 ** 3,
3583         'gB': 1024 ** 3,
3584         'Gb': 1000 ** 3,
3585         'gb': 1000 ** 3,
3586         'gigabytes': 1000 ** 3,
3587         'gibibytes': 1024 ** 3,
3588         'TiB': 1024 ** 4,
3589         'TB': 1000 ** 4,
3590         'tB': 1024 ** 4,
3591         'Tb': 1000 ** 4,
3592         'tb': 1000 ** 4,
3593         'terabytes': 1000 ** 4,
3594         'tebibytes': 1024 ** 4,
3595         'PiB': 1024 ** 5,
3596         'PB': 1000 ** 5,
3597         'pB': 1024 ** 5,
3598         'Pb': 1000 ** 5,
3599         'pb': 1000 ** 5,
3600         'petabytes': 1000 ** 5,
3601         'pebibytes': 1024 ** 5,
3602         'EiB': 1024 ** 6,
3603         'EB': 1000 ** 6,
3604         'eB': 1024 ** 6,
3605         'Eb': 1000 ** 6,
3606         'eb': 1000 ** 6,
3607         'exabytes': 1000 ** 6,
3608         'exbibytes': 1024 ** 6,
3609         'ZiB': 1024 ** 7,
3610         'ZB': 1000 ** 7,
3611         'zB': 1024 ** 7,
3612         'Zb': 1000 ** 7,
3613         'zb': 1000 ** 7,
3614         'zettabytes': 1000 ** 7,
3615         'zebibytes': 1024 ** 7,
3616         'YiB': 1024 ** 8,
3617         'YB': 1000 ** 8,
3618         'yB': 1024 ** 8,
3619         'Yb': 1000 ** 8,
3620         'yb': 1000 ** 8,
3621         'yottabytes': 1000 ** 8,
3622         'yobibytes': 1024 ** 8,
3623     }
3624
3625     return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628 def parse_count(s):
3629     if s is None:
3630         return None
3631
3632     s = s.strip()
3633
3634     if re.match(r'^[\d,.]+$', s):
3635         return str_to_int(s)
3636
3637     _UNIT_TABLE = {
3638         'k': 1000,
3639         'K': 1000,
3640         'm': 1000 ** 2,
3641         'M': 1000 ** 2,
3642         'kk': 1000 ** 2,
3643         'KK': 1000 ** 2,
3644     }
3645
3646     return lookup_unit_table(_UNIT_TABLE, s)
3647
3648
3649 def parse_resolution(s):
3650     if s is None:
3651         return {}
3652
3653     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654     if mobj:
3655         return {
3656             'width': int(mobj.group('w')),
3657             'height': int(mobj.group('h')),
3658         }
3659
3660     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661     if mobj:
3662         return {'height': int(mobj.group(1))}
3663
3664     mobj = re.search(r'\b([48])[kK]\b', s)
3665     if mobj:
3666         return {'height': int(mobj.group(1)) * 540}
3667
3668     return {}
3669
3670
3671 def parse_bitrate(s):
3672     if not isinstance(s, compat_str):
3673         return
3674     mobj = re.search(r'\b(\d+)\s*kbps', s)
3675     if mobj:
3676         return int(mobj.group(1))
3677
3678
3679 def month_by_name(name, lang='en'):
3680     """ Return the number of a month by (locale-independently) English name """
3681
3682     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3683
3684     try:
3685         return month_names.index(name) + 1
3686     except ValueError:
3687         return None
3688
3689
3690 def month_by_abbreviation(abbrev):
3691     """ Return the number of a month by (locale-independently) English
3692         abbreviations """
3693
3694     try:
3695         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3696     except ValueError:
3697         return None
3698
3699
3700 def fix_xml_ampersands(xml_str):
3701     """Replace all the '&' by '&amp;' in XML"""
3702     return re.sub(
3703         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3704         '&amp;',
3705         xml_str)
3706
3707
3708 def setproctitle(title):
3709     assert isinstance(title, compat_str)
3710
3711     # ctypes in Jython is not complete
3712     # http://bugs.jython.org/issue2148
3713     if sys.platform.startswith('java'):
3714         return
3715
3716     try:
3717         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3718     except OSError:
3719         return
3720     except TypeError:
3721         # LoadLibrary in Windows Python 2.7.13 only expects
3722         # a bytestring, but since unicode_literals turns
3723         # every string into a unicode string, it fails.
3724         return
3725     title_bytes = title.encode('utf-8')
3726     buf = ctypes.create_string_buffer(len(title_bytes))
3727     buf.value = title_bytes
3728     try:
3729         libc.prctl(15, buf, 0, 0, 0)
3730     except AttributeError:
3731         return  # Strange libc, just skip this
3732
3733
3734 def remove_start(s, start):
3735     return s[len(start):] if s is not None and s.startswith(start) else s
3736
3737
3738 def remove_end(s, end):
3739     return s[:-len(end)] if s is not None and s.endswith(end) else s
3740
3741
3742 def remove_quotes(s):
3743     if s is None or len(s) < 2:
3744         return s
3745     for quote in ('"', "'", ):
3746         if s[0] == quote and s[-1] == quote:
3747             return s[1:-1]
3748     return s
3749
3750
3751 def get_domain(url):
3752     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753     return domain.group('domain') if domain else None
3754
3755
3756 def url_basename(url):
3757     path = compat_urlparse.urlparse(url).path
3758     return path.strip('/').split('/')[-1]
3759
3760
3761 def base_url(url):
3762     return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
3765 def urljoin(base, path):
3766     if isinstance(path, bytes):
3767         path = path.decode('utf-8')
3768     if not isinstance(path, compat_str) or not path:
3769         return None
3770     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3771         return path
3772     if isinstance(base, bytes):
3773         base = base.decode('utf-8')
3774     if not isinstance(base, compat_str) or not re.match(
3775             r'^(?:https?:)?//', base):
3776         return None
3777     return compat_urlparse.urljoin(base, path)
3778
3779
3780 class HEADRequest(compat_urllib_request.Request):
3781     def get_method(self):
3782         return 'HEAD'
3783
3784
3785 class PUTRequest(compat_urllib_request.Request):
3786     def get_method(self):
3787         return 'PUT'
3788
3789
3790 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3791     if get_attr:
3792         if v is not None:
3793             v = getattr(v, get_attr, None)
3794     if v == '':
3795         v = None
3796     if v is None:
3797         return default
3798     try:
3799         return int(v) * invscale // scale
3800     except (ValueError, TypeError):
3801         return default
3802
3803
3804 def str_or_none(v, default=None):
3805     return default if v is None else compat_str(v)
3806
3807
3808 def str_to_int(int_str):
3809     """ A more relaxed version of int_or_none """
3810     if isinstance(int_str, compat_integer_types):
3811         return int_str
3812     elif isinstance(int_str, compat_str):
3813         int_str = re.sub(r'[,\.\+]', '', int_str)
3814         return int_or_none(int_str)
3815
3816
3817 def float_or_none(v, scale=1, invscale=1, default=None):
3818     if v is None:
3819         return default
3820     try:
3821         return float(v) * invscale / scale
3822     except (ValueError, TypeError):
3823         return default
3824
3825
3826 def bool_or_none(v, default=None):
3827     return v if isinstance(v, bool) else default
3828
3829
3830 def strip_or_none(v, default=None):
3831     return v.strip() if isinstance(v, compat_str) else default
3832
3833
3834 def url_or_none(url):
3835     if not url or not isinstance(url, compat_str):
3836         return None
3837     url = url.strip()
3838     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3839
3840
3841 def strftime_or_none(timestamp, date_format, default=None):
3842     datetime_object = None
3843     try:
3844         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3845             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3847             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848         return datetime_object.strftime(date_format)
3849     except (ValueError, TypeError, AttributeError):
3850         return default
3851
3852
3853 def parse_duration(s):
3854     if not isinstance(s, compat_basestring):
3855         return None
3856
3857     s = s.strip()
3858
3859     days, hours, mins, secs, ms = [None] * 5
3860     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3861     if m:
3862         days, hours, mins, secs, ms = m.groups()
3863     else:
3864         m = re.match(
3865             r'''(?ix)(?:P?
3866                 (?:
3867                     [0-9]+\s*y(?:ears?)?\s*
3868                 )?
3869                 (?:
3870                     [0-9]+\s*m(?:onths?)?\s*
3871                 )?
3872                 (?:
3873                     [0-9]+\s*w(?:eeks?)?\s*
3874                 )?
3875                 (?:
3876                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3877                 )?
3878                 T)?
3879                 (?:
3880                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881                 )?
3882                 (?:
3883                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884                 )?
3885                 (?:
3886                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3887                 )?Z?$''', s)
3888         if m:
3889             days, hours, mins, secs, ms = m.groups()
3890         else:
3891             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3892             if m:
3893                 hours, mins = m.groups()
3894             else:
3895                 return None
3896
3897     duration = 0
3898     if secs:
3899         duration += float(secs)
3900     if mins:
3901         duration += float(mins) * 60
3902     if hours:
3903         duration += float(hours) * 60 * 60
3904     if days:
3905         duration += float(days) * 24 * 60 * 60
3906     if ms:
3907         duration += float(ms)
3908     return duration
3909
3910
3911 def prepend_extension(filename, ext, expected_real_ext=None):
3912     name, real_ext = os.path.splitext(filename)
3913     return (
3914         '{0}.{1}{2}'.format(name, ext, real_ext)
3915         if not expected_real_ext or real_ext[1:] == expected_real_ext
3916         else '{0}.{1}'.format(filename, ext))
3917
3918
3919 def replace_extension(filename, ext, expected_real_ext=None):
3920     name, real_ext = os.path.splitext(filename)
3921     return '{0}.{1}'.format(
3922         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923         ext)
3924
3925
3926 def check_executable(exe, args=[]):
3927     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928     args can be a list of arguments for a short output (like -version) """
3929     try:
3930         process_communicate_or_kill(subprocess.Popen(
3931             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3932     except OSError:
3933         return False
3934     return exe
3935
3936
3937 def get_exe_version(exe, args=['--version'],
3938                     version_re=None, unrecognized='present'):
3939     """ Returns the version of the specified executable,
3940     or False if the executable is not present """
3941     try:
3942         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3943         # SIGTTOU if yt-dlp is run in the background.
3944         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3945         out, _ = process_communicate_or_kill(subprocess.Popen(
3946             [encodeArgument(exe)] + args,
3947             stdin=subprocess.PIPE,
3948             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3949     except OSError:
3950         return False
3951     if isinstance(out, bytes):  # Python 2.x
3952         out = out.decode('ascii', 'ignore')
3953     return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956 def detect_exe_version(output, version_re=None, unrecognized='present'):
3957     assert isinstance(output, compat_str)
3958     if version_re is None:
3959         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960     m = re.search(version_re, output)
3961     if m:
3962         return m.group(1)
3963     else:
3964         return unrecognized
3965
3966
3967 class LazyList(collections.abc.Sequence):
3968     ''' Lazy immutable list from an iterable
3969     Note that slices of a LazyList are lists and not LazyList'''
3970
3971     def __init__(self, iterable):
3972         self.__iterable = iter(iterable)
3973         self.__cache = []
3974         self.__reversed = False
3975
3976     def __iter__(self):
3977         if self.__reversed:
3978             # We need to consume the entire iterable to iterate in reverse
3979             yield from self.exhaust()
3980             return
3981         yield from self.__cache
3982         for item in self.__iterable:
3983             self.__cache.append(item)
3984             yield item
3985
3986     def __exhaust(self):
3987         self.__cache.extend(self.__iterable)
3988         return self.__cache
3989
3990     def exhaust(self):
3991         ''' Evaluate the entire iterable '''
3992         return self.__exhaust()[::-1 if self.__reversed else 1]
3993
3994     @staticmethod
3995     def __reverse_index(x):
3996         return None if x is None else -(x + 1)
3997
3998     def __getitem__(self, idx):
3999         if isinstance(idx, slice):
4000             if self.__reversed:
4001                 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4002             start, stop, step = idx.start, idx.stop, idx.step or 1
4003         elif isinstance(idx, int):
4004             if self.__reversed:
4005                 idx = self.__reverse_index(idx)
4006             start, stop, step = idx, idx, 0
4007         else:
4008             raise TypeError('indices must be integers or slices')
4009         if ((start or 0) < 0 or (stop or 0) < 0
4010                 or (start is None and step < 0)
4011                 or (stop is None and step > 0)):
4012             # We need to consume the entire iterable to be able to slice from the end
4013             # Obviously, never use this with infinite iterables
4014             return self.__exhaust()[idx]
4015
4016         n = max(start or 0, stop or 0) - len(self.__cache) + 1
4017         if n > 0:
4018             self.__cache.extend(itertools.islice(self.__iterable, n))
4019         return self.__cache[idx]
4020
4021     def __bool__(self):
4022         try:
4023             self[-1] if self.__reversed else self[0]
4024         except IndexError:
4025             return False
4026         return True
4027
4028     def __len__(self):
4029         self.exhaust()
4030         return len(self.__cache)
4031
4032     def reverse(self):
4033         self.__reversed = not self.__reversed
4034         return self
4035
4036     def __repr__(self):
4037         # repr and str should mimic a list. So we exhaust the iterable
4038         return repr(self.exhaust())
4039
4040     def __str__(self):
4041         return repr(self.exhaust())
4042
4043
4044 class PagedList:
4045     def __len__(self):
4046         # This is only useful for tests
4047         return len(self.getslice())
4048
4049     def __init__(self, pagefunc, pagesize, use_cache=True):
4050         self._pagefunc = pagefunc
4051         self._pagesize = pagesize
4052         self._use_cache = use_cache
4053         self._cache = {}
4054
4055     def getpage(self, pagenum):
4056         page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4057         if self._use_cache:
4058             self._cache[pagenum] = page_results
4059         return page_results
4060
4061     def getslice(self, start=0, end=None):
4062         return list(self._getslice(start, end))
4063
4064     def _getslice(self, start, end):
4065         raise NotImplementedError('This method must be implemented by subclasses')
4066
4067     def __getitem__(self, idx):
4068         # NOTE: cache must be enabled if this is used
4069         if not isinstance(idx, int) or idx < 0:
4070             raise TypeError('indices must be non-negative integers')
4071         entries = self.getslice(idx, idx + 1)
4072         return entries[0] if entries else None
4073
4074
4075 class OnDemandPagedList(PagedList):
4076     def _getslice(self, start, end):
4077         for pagenum in itertools.count(start // self._pagesize):
4078             firstid = pagenum * self._pagesize
4079             nextfirstid = pagenum * self._pagesize + self._pagesize
4080             if start >= nextfirstid:
4081                 continue
4082
4083             startv = (
4084                 start % self._pagesize
4085                 if firstid <= start < nextfirstid
4086                 else 0)
4087             endv = (
4088                 ((end - 1) % self._pagesize) + 1
4089                 if (end is not None and firstid <= end <= nextfirstid)
4090                 else None)
4091
4092             page_results = self.getpage(pagenum)
4093             if startv != 0 or endv is not None:
4094                 page_results = page_results[startv:endv]
4095             yield from page_results
4096
4097             # A little optimization - if current page is not "full", ie. does
4098             # not contain page_size videos then we can assume that this page
4099             # is the last one - there are no more ids on further pages -
4100             # i.e. no need to query again.
4101             if len(page_results) + startv < self._pagesize:
4102                 break
4103
4104             # If we got the whole page, but the next page is not interesting,
4105             # break out early as well
4106             if end == nextfirstid:
4107                 break
4108
4109
4110 class InAdvancePagedList(PagedList):
4111     def __init__(self, pagefunc, pagecount, pagesize):
4112         self._pagecount = pagecount
4113         PagedList.__init__(self, pagefunc, pagesize, True)
4114
4115     def _getslice(self, start, end):
4116         start_page = start // self._pagesize
4117         end_page = (
4118             self._pagecount if end is None else (end // self._pagesize + 1))
4119         skip_elems = start - start_page * self._pagesize
4120         only_more = None if end is None else end - start
4121         for pagenum in range(start_page, end_page):
4122             page_results = self.getpage(pagenum)
4123             if skip_elems:
4124                 page_results = page_results[skip_elems:]
4125                 skip_elems = None
4126             if only_more is not None:
4127                 if len(page_results) < only_more:
4128                     only_more -= len(page_results)
4129                 else:
4130                     yield from page_results[:only_more]
4131                     break
4132             yield from page_results
4133
4134
4135 def uppercase_escape(s):
4136     unicode_escape = codecs.getdecoder('unicode_escape')
4137     return re.sub(
4138         r'\\U[0-9a-fA-F]{8}',
4139         lambda m: unicode_escape(m.group(0))[0],
4140         s)
4141
4142
4143 def lowercase_escape(s):
4144     unicode_escape = codecs.getdecoder('unicode_escape')
4145     return re.sub(
4146         r'\\u[0-9a-fA-F]{4}',
4147         lambda m: unicode_escape(m.group(0))[0],
4148         s)
4149
4150
4151 def escape_rfc3986(s):
4152     """Escape non-ASCII characters as suggested by RFC 3986"""
4153     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4154         s = s.encode('utf-8')
4155     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4156
4157
4158 def escape_url(url):
4159     """Escape URL as suggested by RFC 3986"""
4160     url_parsed = compat_urllib_parse_urlparse(url)
4161     return url_parsed._replace(
4162         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4163         path=escape_rfc3986(url_parsed.path),
4164         params=escape_rfc3986(url_parsed.params),
4165         query=escape_rfc3986(url_parsed.query),
4166         fragment=escape_rfc3986(url_parsed.fragment)
4167     ).geturl()
4168
4169
4170 def read_batch_urls(batch_fd):
4171     def fixup(url):
4172         if not isinstance(url, compat_str):
4173             url = url.decode('utf-8', 'replace')
4174         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4175         for bom in BOM_UTF8:
4176             if url.startswith(bom):
4177                 url = url[len(bom):]
4178         url = url.lstrip()
4179         if not url or url.startswith(('#', ';', ']')):
4180             return False
4181         # "#" cannot be stripped out since it is part of the URI
4182         # However, it can be safely stipped out if follwing a whitespace
4183         return re.split(r'\s#', url, 1)[0].rstrip()
4184
4185     with contextlib.closing(batch_fd) as fd:
4186         return [url for url in map(fixup, fd) if url]
4187
4188
4189 def urlencode_postdata(*args, **kargs):
4190     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4191
4192
4193 def update_url_query(url, query):
4194     if not query:
4195         return url
4196     parsed_url = compat_urlparse.urlparse(url)
4197     qs = compat_parse_qs(parsed_url.query)
4198     qs.update(query)
4199     return compat_urlparse.urlunparse(parsed_url._replace(
4200         query=compat_urllib_parse_urlencode(qs, True)))
4201
4202
4203 def update_Request(req, url=None, data=None, headers={}, query={}):
4204     req_headers = req.headers.copy()
4205     req_headers.update(headers)
4206     req_data = data or req.data
4207     req_url = update_url_query(url or req.get_full_url(), query)
4208     req_get_method = req.get_method()
4209     if req_get_method == 'HEAD':
4210         req_type = HEADRequest
4211     elif req_get_method == 'PUT':
4212         req_type = PUTRequest
4213     else:
4214         req_type = compat_urllib_request.Request
4215     new_req = req_type(
4216         req_url, data=req_data, headers=req_headers,
4217         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4218     if hasattr(req, 'timeout'):
4219         new_req.timeout = req.timeout
4220     return new_req
4221
4222
4223 def _multipart_encode_impl(data, boundary):
4224     content_type = 'multipart/form-data; boundary=%s' % boundary
4225
4226     out = b''
4227     for k, v in data.items():
4228         out += b'--' + boundary.encode('ascii') + b'\r\n'
4229         if isinstance(k, compat_str):
4230             k = k.encode('utf-8')
4231         if isinstance(v, compat_str):
4232             v = v.encode('utf-8')
4233         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4234         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4235         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4236         if boundary.encode('ascii') in content:
4237             raise ValueError('Boundary overlaps with data')
4238         out += content
4239
4240     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4241
4242     return out, content_type
4243
4244
4245 def multipart_encode(data, boundary=None):
4246     '''
4247     Encode a dict to RFC 7578-compliant form-data
4248
4249     data:
4250         A dict where keys and values can be either Unicode or bytes-like
4251         objects.
4252     boundary:
4253         If specified a Unicode object, it's used as the boundary. Otherwise
4254         a random boundary is generated.
4255
4256     Reference: https://tools.ietf.org/html/rfc7578
4257     '''
4258     has_specified_boundary = boundary is not None
4259
4260     while True:
4261         if boundary is None:
4262             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4263
4264         try:
4265             out, content_type = _multipart_encode_impl(data, boundary)
4266             break
4267         except ValueError:
4268             if has_specified_boundary:
4269                 raise
4270             boundary = None
4271
4272     return out, content_type
4273
4274
4275 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4276     if isinstance(key_or_keys, (list, tuple)):
4277         for key in key_or_keys:
4278             if key not in d or d[key] is None or skip_false_values and not d[key]:
4279                 continue
4280             return d[key]
4281         return default
4282     return d.get(key_or_keys, default)
4283
4284
4285 def try_get(src, getter, expected_type=None):
4286     for get in variadic(getter):
4287         try:
4288             v = get(src)
4289         except (AttributeError, KeyError, TypeError, IndexError):
4290             pass
4291         else:
4292             if expected_type is None or isinstance(v, expected_type):
4293                 return v
4294
4295
4296 def merge_dicts(*dicts):
4297     merged = {}
4298     for a_dict in dicts:
4299         for k, v in a_dict.items():
4300             if v is None:
4301                 continue
4302             if (k not in merged
4303                     or (isinstance(v, compat_str) and v
4304                         and isinstance(merged[k], compat_str)
4305                         and not merged[k])):
4306                 merged[k] = v
4307     return merged
4308
4309
4310 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4311     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4312
4313
4314 US_RATINGS = {
4315     'G': 0,
4316     'PG': 10,
4317     'PG-13': 13,
4318     'R': 16,
4319     'NC': 18,
4320 }
4321
4322
4323 TV_PARENTAL_GUIDELINES = {
4324     'TV-Y': 0,
4325     'TV-Y7': 7,
4326     'TV-G': 0,
4327     'TV-PG': 0,
4328     'TV-14': 14,
4329     'TV-MA': 17,
4330 }
4331
4332
4333 def parse_age_limit(s):
4334     if type(s) == int:
4335         return s if 0 <= s <= 21 else None
4336     if not isinstance(s, compat_basestring):
4337         return None
4338     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4339     if m:
4340         return int(m.group('age'))
4341     s = s.upper()
4342     if s in US_RATINGS:
4343         return US_RATINGS[s]
4344     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4345     if m:
4346         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4347     return None
4348
4349
4350 def strip_jsonp(code):
4351     return re.sub(
4352         r'''(?sx)^
4353             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4354             (?:\s*&&\s*(?P=func_name))?
4355             \s*\(\s*(?P<callback_data>.*)\);?
4356             \s*?(?://[^\n]*)*$''',
4357         r'\g<callback_data>', code)
4358
4359
4360 def js_to_json(code, vars={}):
4361     # vars is a dict of var, val pairs to substitute
4362     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4363     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4364     INTEGER_TABLE = (
4365         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4366         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4367     )
4368
4369     def fix_kv(m):
4370         v = m.group(0)
4371         if v in ('true', 'false', 'null'):
4372             return v
4373         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4374             return ""
4375
4376         if v[0] in ("'", '"'):
4377             v = re.sub(r'(?s)\\.|"', lambda m: {
4378                 '"': '\\"',
4379                 "\\'": "'",
4380                 '\\\n': '',
4381                 '\\x': '\\u00',
4382             }.get(m.group(0), m.group(0)), v[1:-1])
4383         else:
4384             for regex, base in INTEGER_TABLE:
4385                 im = re.match(regex, v)
4386                 if im:
4387                     i = int(im.group(1), base)
4388                     return '"%d":' % i if v.endswith(':') else '%d' % i
4389
4390             if v in vars:
4391                 return vars[v]
4392
4393         return '"%s"' % v
4394
4395     return re.sub(r'''(?sx)
4396         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4397         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4398         {comment}|,(?={skip}[\]}}])|
4399         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4400         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4401         [0-9]+(?={skip}:)|
4402         !+
4403         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4404
4405
4406 def qualities(quality_ids):
4407     """ Get a numeric quality value out of a list of possible values """
4408     def q(qid):
4409         try:
4410             return quality_ids.index(qid)
4411         except ValueError:
4412             return -1
4413     return q
4414
4415
4416 DEFAULT_OUTTMPL = {
4417     'default': '%(title)s [%(id)s].%(ext)s',
4418     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4419 }
4420 OUTTMPL_TYPES = {
4421     'chapter': None,
4422     'subtitle': None,
4423     'thumbnail': None,
4424     'description': 'description',
4425     'annotation': 'annotations.xml',
4426     'infojson': 'info.json',
4427     'pl_thumbnail': None,
4428     'pl_description': 'description',
4429     'pl_infojson': 'info.json',
4430 }
4431
4432 # As of [1] format syntax is:
4433 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4434 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4435 STR_FORMAT_RE_TMPL = r'''(?x)
4436     (?<!%)(?P<prefix>(?:%%)*)
4437     %
4438     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4439     (?P<format>
4440         (?:[#0\-+ ]+)?  # conversion flags (optional)
4441         (?:\d+)?  # minimum field width (optional)
4442         (?:\.\d+)?  # precision (optional)
4443         [hlL]?  # length modifier (optional)
4444         {1}  # conversion type
4445     )
4446 '''
4447
4448
4449 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4450
4451
4452 def limit_length(s, length):
4453     """ Add ellipses to overly long strings """
4454     if s is None:
4455         return None
4456     ELLIPSES = '...'
4457     if len(s) > length:
4458         return s[:length - len(ELLIPSES)] + ELLIPSES
4459     return s
4460
4461
4462 def version_tuple(v):
4463     return tuple(int(e) for e in re.split(r'[-.]', v))
4464
4465
4466 def is_outdated_version(version, limit, assume_new=True):
4467     if not version:
4468         return not assume_new
4469     try:
4470         return version_tuple(version) < version_tuple(limit)
4471     except ValueError:
4472         return not assume_new
4473
4474
4475 def ytdl_is_updateable():
4476     """ Returns if yt-dlp can be updated with -U """
4477     return False
4478
4479     from zipimport import zipimporter
4480
4481     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4482
4483
4484 def args_to_str(args):
4485     # Get a short string representation for a subprocess command
4486     return ' '.join(compat_shlex_quote(a) for a in args)
4487
4488
4489 def error_to_compat_str(err):
4490     err_str = str(err)
4491     # On python 2 error byte string must be decoded with proper
4492     # encoding rather than ascii
4493     if sys.version_info[0] < 3:
4494         err_str = err_str.decode(preferredencoding())
4495     return err_str
4496
4497
4498 def mimetype2ext(mt):
4499     if mt is None:
4500         return None
4501
4502     ext = {
4503         'audio/mp4': 'm4a',
4504         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4505         # it's the most popular one
4506         'audio/mpeg': 'mp3',
4507         'audio/x-wav': 'wav',
4508     }.get(mt)
4509     if ext is not None:
4510         return ext
4511
4512     _, _, res = mt.rpartition('/')
4513     res = res.split(';')[0].strip().lower()
4514
4515     return {
4516         '3gpp': '3gp',
4517         'smptett+xml': 'tt',
4518         'ttaf+xml': 'dfxp',
4519         'ttml+xml': 'ttml',
4520         'x-flv': 'flv',
4521         'x-mp4-fragmented': 'mp4',
4522         'x-ms-sami': 'sami',
4523         'x-ms-wmv': 'wmv',
4524         'mpegurl': 'm3u8',
4525         'x-mpegurl': 'm3u8',
4526         'vnd.apple.mpegurl': 'm3u8',
4527         'dash+xml': 'mpd',
4528         'f4m+xml': 'f4m',
4529         'hds+xml': 'f4m',
4530         'vnd.ms-sstr+xml': 'ism',
4531         'quicktime': 'mov',
4532         'mp2t': 'ts',
4533         'x-wav': 'wav',
4534     }.get(res, res)
4535
4536
4537 def parse_codecs(codecs_str):
4538     # http://tools.ietf.org/html/rfc6381
4539     if not codecs_str:
4540         return {}
4541     split_codecs = list(filter(None, map(
4542         str.strip, codecs_str.strip().strip(',').split(','))))
4543     vcodec, acodec = None, None
4544     for full_codec in split_codecs:
4545         codec = full_codec.split('.')[0]
4546         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4547             if not vcodec:
4548                 vcodec = full_codec
4549         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4550             if not acodec:
4551                 acodec = full_codec
4552         else:
4553             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4554     if not vcodec and not acodec:
4555         if len(split_codecs) == 2:
4556             return {
4557                 'vcodec': split_codecs[0],
4558                 'acodec': split_codecs[1],
4559             }
4560     else:
4561         return {
4562             'vcodec': vcodec or 'none',
4563             'acodec': acodec or 'none',
4564         }
4565     return {}
4566
4567
4568 def urlhandle_detect_ext(url_handle):
4569     getheader = url_handle.headers.get
4570
4571     cd = getheader('Content-Disposition')
4572     if cd:
4573         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4574         if m:
4575             e = determine_ext(m.group('filename'), default_ext=None)
4576             if e:
4577                 return e
4578
4579     return mimetype2ext(getheader('Content-Type'))
4580
4581
4582 def encode_data_uri(data, mime_type):
4583     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4584
4585
4586 def age_restricted(content_limit, age_limit):
4587     """ Returns True iff the content should be blocked """
4588
4589     if age_limit is None:  # No limit set
4590         return False
4591     if content_limit is None:
4592         return False  # Content available for everyone
4593     return age_limit < content_limit
4594
4595
4596 def is_html(first_bytes):
4597     """ Detect whether a file contains HTML by examining its first bytes. """
4598
4599     BOMS = [
4600         (b'\xef\xbb\xbf', 'utf-8'),
4601         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4602         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4603         (b'\xff\xfe', 'utf-16-le'),
4604         (b'\xfe\xff', 'utf-16-be'),
4605     ]
4606     for bom, enc in BOMS:
4607         if first_bytes.startswith(bom):
4608             s = first_bytes[len(bom):].decode(enc, 'replace')
4609             break
4610     else:
4611         s = first_bytes.decode('utf-8', 'replace')
4612
4613     return re.match(r'^\s*<', s)
4614
4615
4616 def determine_protocol(info_dict):
4617     protocol = info_dict.get('protocol')
4618     if protocol is not None:
4619         return protocol
4620
4621     url = info_dict['url']
4622     if url.startswith('rtmp'):
4623         return 'rtmp'
4624     elif url.startswith('mms'):
4625         return 'mms'
4626     elif url.startswith('rtsp'):
4627         return 'rtsp'
4628
4629     ext = determine_ext(url)
4630     if ext == 'm3u8':
4631         return 'm3u8'
4632     elif ext == 'f4m':
4633         return 'f4m'
4634
4635     return compat_urllib_parse_urlparse(url).scheme
4636
4637
4638 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4639     """ Render a list of rows, each as a list of values """
4640
4641     def get_max_lens(table):
4642         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4643
4644     def filter_using_list(row, filterArray):
4645         return [col for (take, col) in zip(filterArray, row) if take]
4646
4647     if hideEmpty:
4648         max_lens = get_max_lens(data)
4649         header_row = filter_using_list(header_row, max_lens)
4650         data = [filter_using_list(row, max_lens) for row in data]
4651
4652     table = [header_row] + data
4653     max_lens = get_max_lens(table)
4654     if delim:
4655         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4656     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4657     return '\n'.join(format_str % tuple(row) for row in table)
4658
4659
4660 def _match_one(filter_part, dct):
4661     # TODO: Generalize code with YoutubeDL._build_format_filter
4662     STRING_OPERATORS = {
4663         '*=': operator.contains,
4664         '^=': lambda attr, value: attr.startswith(value),
4665         '$=': lambda attr, value: attr.endswith(value),
4666         '~=': lambda attr, value: re.search(value, attr),
4667     }
4668     COMPARISON_OPERATORS = {
4669         **STRING_OPERATORS,
4670         '<=': operator.le,  # "<=" must be defined above "<"
4671         '<': operator.lt,
4672         '>=': operator.ge,
4673         '>': operator.gt,
4674         '=': operator.eq,
4675     }
4676
4677     operator_rex = re.compile(r'''(?x)\s*
4678         (?P<key>[a-z_]+)
4679         \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4680         (?:
4681             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4682             (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4683             (?P<strval>.+?)
4684         )
4685         \s*$
4686         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4687     m = operator_rex.search(filter_part)
4688     if m:
4689         unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4690         if m.group('negation'):
4691             op = lambda attr, value: not unnegated_op(attr, value)
4692         else:
4693             op = unnegated_op
4694         actual_value = dct.get(m.group('key'))
4695         if (m.group('quotedstrval') is not None
4696             or m.group('strval') is not None
4697             # If the original field is a string and matching comparisonvalue is
4698             # a number we should respect the origin of the original field
4699             # and process comparison value as a string (see
4700             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4701             or actual_value is not None and m.group('intval') is not None
4702                 and isinstance(actual_value, compat_str)):
4703             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4704             quote = m.group('quote')
4705             if quote is not None:
4706                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4707         else:
4708             if m.group('op') in STRING_OPERATORS:
4709                 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4710             try:
4711                 comparison_value = int(m.group('intval'))
4712             except ValueError:
4713                 comparison_value = parse_filesize(m.group('intval'))
4714                 if comparison_value is None:
4715                     comparison_value = parse_filesize(m.group('intval') + 'B')
4716                 if comparison_value is None:
4717                     raise ValueError(
4718                         'Invalid integer value %r in filter part %r' % (
4719                             m.group('intval'), filter_part))
4720         if actual_value is None:
4721             return m.group('none_inclusive')
4722         return op(actual_value, comparison_value)
4723
4724     UNARY_OPERATORS = {
4725         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4726         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4727     }
4728     operator_rex = re.compile(r'''(?x)\s*
4729         (?P<op>%s)\s*(?P<key>[a-z_]+)
4730         \s*$
4731         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4732     m = operator_rex.search(filter_part)
4733     if m:
4734         op = UNARY_OPERATORS[m.group('op')]
4735         actual_value = dct.get(m.group('key'))
4736         return op(actual_value)
4737
4738     raise ValueError('Invalid filter part %r' % filter_part)
4739
4740
4741 def match_str(filter_str, dct):
4742     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4743
4744     return all(
4745         _match_one(filter_part.replace(r'\&', '&'), dct)
4746         for filter_part in re.split(r'(?<!\\)&', filter_str))
4747
4748
4749 def match_filter_func(filter_str):
4750     def _match_func(info_dict):
4751         if match_str(filter_str, info_dict):
4752             return None
4753         else:
4754             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4755             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4756     return _match_func
4757
4758
4759 def parse_dfxp_time_expr(time_expr):
4760     if not time_expr:
4761         return
4762
4763     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4764     if mobj:
4765         return float(mobj.group('time_offset'))
4766
4767     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4768     if mobj:
4769         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4770
4771
4772 def srt_subtitles_timecode(seconds):
4773     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4774
4775
4776 def dfxp2srt(dfxp_data):
4777     '''
4778     @param dfxp_data A bytes-like object containing DFXP data
4779     @returns A unicode object containing converted SRT data
4780     '''
4781     LEGACY_NAMESPACES = (
4782         (b'http://www.w3.org/ns/ttml', [
4783             b'http://www.w3.org/2004/11/ttaf1',
4784             b'http://www.w3.org/2006/04/ttaf1',
4785             b'http://www.w3.org/2006/10/ttaf1',
4786         ]),
4787         (b'http://www.w3.org/ns/ttml#styling', [
4788             b'http://www.w3.org/ns/ttml#style',
4789         ]),
4790     )
4791
4792     SUPPORTED_STYLING = [
4793         'color',
4794         'fontFamily',
4795         'fontSize',
4796         'fontStyle',
4797         'fontWeight',
4798         'textDecoration'
4799     ]
4800
4801     _x = functools.partial(xpath_with_ns, ns_map={
4802         'xml': 'http://www.w3.org/XML/1998/namespace',
4803         'ttml': 'http://www.w3.org/ns/ttml',
4804         'tts': 'http://www.w3.org/ns/ttml#styling',
4805     })
4806
4807     styles = {}
4808     default_style = {}
4809
4810     class TTMLPElementParser(object):
4811         _out = ''
4812         _unclosed_elements = []
4813         _applied_styles = []
4814
4815         def start(self, tag, attrib):
4816             if tag in (_x('ttml:br'), 'br'):
4817                 self._out += '\n'
4818             else:
4819                 unclosed_elements = []
4820                 style = {}
4821                 element_style_id = attrib.get('style')
4822                 if default_style:
4823                     style.update(default_style)
4824                 if element_style_id:
4825                     style.update(styles.get(element_style_id, {}))
4826                 for prop in SUPPORTED_STYLING:
4827                     prop_val = attrib.get(_x('tts:' + prop))
4828                     if prop_val:
4829                         style[prop] = prop_val
4830                 if style:
4831                     font = ''
4832                     for k, v in sorted(style.items()):
4833                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4834                             continue
4835                         if k == 'color':
4836                             font += ' color="%s"' % v
4837                         elif k == 'fontSize':
4838                             font += ' size="%s"' % v
4839                         elif k == 'fontFamily':
4840                             font += ' face="%s"' % v
4841                         elif k == 'fontWeight' and v == 'bold':
4842                             self._out += '<b>'
4843                             unclosed_elements.append('b')
4844                         elif k == 'fontStyle' and v == 'italic':
4845                             self._out += '<i>'
4846                             unclosed_elements.append('i')
4847                         elif k == 'textDecoration' and v == 'underline':
4848                             self._out += '<u>'
4849                             unclosed_elements.append('u')
4850                     if font:
4851                         self._out += '<font' + font + '>'
4852                         unclosed_elements.append('font')
4853                     applied_style = {}
4854                     if self._applied_styles:
4855                         applied_style.update(self._applied_styles[-1])
4856                     applied_style.update(style)
4857                     self._applied_styles.append(applied_style)
4858                 self._unclosed_elements.append(unclosed_elements)
4859
4860         def end(self, tag):
4861             if tag not in (_x('ttml:br'), 'br'):
4862                 unclosed_elements = self._unclosed_elements.pop()
4863                 for element in reversed(unclosed_elements):
4864                     self._out += '</%s>' % element
4865                 if unclosed_elements and self._applied_styles:
4866                     self._applied_styles.pop()
4867
4868         def data(self, data):
4869             self._out += data
4870
4871         def close(self):
4872             return self._out.strip()
4873
4874     def parse_node(node):
4875         target = TTMLPElementParser()
4876         parser = xml.etree.ElementTree.XMLParser(target=target)
4877         parser.feed(xml.etree.ElementTree.tostring(node))
4878         return parser.close()
4879
4880     for k, v in LEGACY_NAMESPACES:
4881         for ns in v:
4882             dfxp_data = dfxp_data.replace(ns, k)
4883
4884     dfxp = compat_etree_fromstring(dfxp_data)
4885     out = []
4886     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4887
4888     if not paras:
4889         raise ValueError('Invalid dfxp/TTML subtitle')
4890
4891     repeat = False
4892     while True:
4893         for style in dfxp.findall(_x('.//ttml:style')):
4894             style_id = style.get('id') or style.get(_x('xml:id'))
4895             if not style_id:
4896                 continue
4897             parent_style_id = style.get('style')
4898             if parent_style_id:
4899                 if parent_style_id not in styles:
4900                     repeat = True
4901                     continue
4902                 styles[style_id] = styles[parent_style_id].copy()
4903             for prop in SUPPORTED_STYLING:
4904                 prop_val = style.get(_x('tts:' + prop))
4905                 if prop_val:
4906                     styles.setdefault(style_id, {})[prop] = prop_val
4907         if repeat:
4908             repeat = False
4909         else:
4910             break
4911
4912     for p in ('body', 'div'):
4913         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4914         if ele is None:
4915             continue
4916         style = styles.get(ele.get('style'))
4917         if not style:
4918             continue
4919         default_style.update(style)
4920
4921     for para, index in zip(paras, itertools.count(1)):
4922         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4923         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4924         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4925         if begin_time is None:
4926             continue
4927         if not end_time:
4928             if not dur:
4929                 continue
4930             end_time = begin_time + dur
4931         out.append('%d\n%s --> %s\n%s\n\n' % (
4932             index,
4933             srt_subtitles_timecode(begin_time),
4934             srt_subtitles_timecode(end_time),
4935             parse_node(para)))
4936
4937     return ''.join(out)
4938
4939
4940 def cli_option(params, command_option, param):
4941     param = params.get(param)
4942     if param:
4943         param = compat_str(param)
4944     return [command_option, param] if param is not None else []
4945
4946
4947 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4948     param = params.get(param)
4949     if param is None:
4950         return []
4951     assert isinstance(param, bool)
4952     if separator:
4953         return [command_option + separator + (true_value if param else false_value)]
4954     return [command_option, true_value if param else false_value]
4955
4956
4957 def cli_valueless_option(params, command_option, param, expected_value=True):
4958     param = params.get(param)
4959     return [command_option] if param == expected_value else []
4960
4961
4962 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4963     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4964         if use_compat:
4965             return argdict
4966         else:
4967             argdict = None
4968     if argdict is None:
4969         return default
4970     assert isinstance(argdict, dict)
4971
4972     assert isinstance(keys, (list, tuple))
4973     for key_list in keys:
4974         arg_list = list(filter(
4975             lambda x: x is not None,
4976             [argdict.get(key.lower()) for key in variadic(key_list)]))
4977         if arg_list:
4978             return [arg for args in arg_list for arg in args]
4979     return default
4980
4981
4982 class ISO639Utils(object):
4983     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4984     _lang_map = {
4985         'aa': 'aar',
4986         'ab': 'abk',
4987         'ae': 'ave',
4988         'af': 'afr',
4989         'ak': 'aka',
4990         'am': 'amh',
4991         'an': 'arg',
4992         'ar': 'ara',
4993         'as': 'asm',
4994         'av': 'ava',
4995         'ay': 'aym',
4996         'az': 'aze',
4997         'ba': 'bak',
4998         'be': 'bel',
4999         'bg': 'bul',
5000         'bh': 'bih',
5001         'bi': 'bis',
5002         'bm': 'bam',
5003         'bn': 'ben',
5004         'bo': 'bod',
5005         'br': 'bre',
5006         'bs': 'bos',
5007         'ca': 'cat',
5008         'ce': 'che',
5009         'ch': 'cha',
5010         'co': 'cos',
5011         'cr': 'cre',
5012         'cs': 'ces',
5013         'cu': 'chu',
5014         'cv': 'chv',
5015         'cy': 'cym',
5016         'da': 'dan',
5017         'de': 'deu',
5018         'dv': 'div',
5019         'dz': 'dzo',
5020         'ee': 'ewe',
5021         'el': 'ell',
5022         'en': 'eng',
5023         'eo': 'epo',
5024         'es': 'spa',
5025         'et': 'est',
5026         'eu': 'eus',
5027         'fa': 'fas',
5028         'ff': 'ful',
5029         'fi': 'fin',
5030         'fj': 'fij',
5031         'fo': 'fao',
5032         'fr': 'fra',
5033         'fy': 'fry',
5034         'ga': 'gle',
5035         'gd': 'gla',
5036         'gl': 'glg',
5037         'gn': 'grn',
5038         'gu': 'guj',
5039         'gv': 'glv',
5040         'ha': 'hau',
5041         'he': 'heb',
5042         'iw': 'heb',  # Replaced by he in 1989 revision
5043         'hi': 'hin',
5044         'ho': 'hmo',
5045         'hr': 'hrv',
5046         'ht': 'hat',
5047         'hu': 'hun',
5048         'hy': 'hye',
5049         'hz': 'her',
5050         'ia': 'ina',
5051         'id': 'ind',
5052         'in': 'ind',  # Replaced by id in 1989 revision
5053         'ie': 'ile',
5054         'ig': 'ibo',
5055         'ii': 'iii',
5056         'ik': 'ipk',
5057         'io': 'ido',
5058         'is': 'isl',
5059         'it': 'ita',
5060         'iu': 'iku',
5061         'ja': 'jpn',
5062         'jv': 'jav',
5063         'ka': 'kat',
5064         'kg': 'kon',
5065         'ki': 'kik',
5066         'kj': 'kua',
5067         'kk': 'kaz',
5068         'kl': 'kal',
5069         'km': 'khm',
5070         'kn': 'kan',
5071         'ko': 'kor',
5072         'kr': 'kau',
5073         'ks': 'kas',
5074         'ku': 'kur',
5075         'kv': 'kom',
5076         'kw': 'cor',
5077         'ky': 'kir',
5078         'la': 'lat',
5079         'lb': 'ltz',
5080         'lg': 'lug',
5081         'li': 'lim',
5082         'ln': 'lin',
5083         'lo': 'lao',
5084         'lt': 'lit',
5085         'lu': 'lub',
5086         'lv': 'lav',
5087         'mg': 'mlg',
5088         'mh': 'mah',
5089         'mi': 'mri',
5090         'mk': 'mkd',
5091         'ml': 'mal',
5092         'mn': 'mon',
5093         'mr': 'mar',
5094         'ms': 'msa',
5095         'mt': 'mlt',
5096         'my': 'mya',
5097         'na': 'nau',
5098         'nb': 'nob',
5099         'nd': 'nde',
5100         'ne': 'nep',
5101         'ng': 'ndo',
5102         'nl': 'nld',
5103         'nn': 'nno',
5104         'no': 'nor',
5105         'nr': 'nbl',
5106         'nv': 'nav',
5107         'ny': 'nya',
5108         'oc': 'oci',
5109         'oj': 'oji',
5110         'om': 'orm',
5111         'or': 'ori',
5112         'os': 'oss',
5113         'pa': 'pan',
5114         'pi': 'pli',
5115         'pl': 'pol',
5116         'ps': 'pus',
5117         'pt': 'por',
5118         'qu': 'que',
5119         'rm': 'roh',
5120         'rn': 'run',
5121         'ro': 'ron',
5122         'ru': 'rus',
5123         'rw': 'kin',
5124         'sa': 'san',
5125         'sc': 'srd',
5126         'sd': 'snd',
5127         'se': 'sme',
5128         'sg': 'sag',
5129         'si': 'sin',
5130         'sk': 'slk',
5131         'sl': 'slv',
5132         'sm': 'smo',
5133         'sn': 'sna',
5134         'so': 'som',
5135         'sq': 'sqi',
5136         'sr': 'srp',
5137         'ss': 'ssw',
5138         'st': 'sot',
5139         'su': 'sun',
5140         'sv': 'swe',
5141         'sw': 'swa',
5142         'ta': 'tam',
5143         'te': 'tel',
5144         'tg': 'tgk',
5145         'th': 'tha',
5146         'ti': 'tir',
5147         'tk': 'tuk',
5148         'tl': 'tgl',
5149         'tn': 'tsn',
5150         'to': 'ton',
5151         'tr': 'tur',
5152         'ts': 'tso',
5153         'tt': 'tat',
5154         'tw': 'twi',
5155         'ty': 'tah',
5156         'ug': 'uig',
5157         'uk': 'ukr',
5158         'ur': 'urd',
5159         'uz': 'uzb',
5160         've': 'ven',
5161         'vi': 'vie',
5162         'vo': 'vol',
5163         'wa': 'wln',
5164         'wo': 'wol',
5165         'xh': 'xho',
5166         'yi': 'yid',
5167         'ji': 'yid',  # Replaced by yi in 1989 revision
5168         'yo': 'yor',
5169         'za': 'zha',
5170         'zh': 'zho',
5171         'zu': 'zul',
5172     }
5173
5174     @classmethod
5175     def short2long(cls, code):
5176         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5177         return cls._lang_map.get(code[:2])
5178
5179     @classmethod
5180     def long2short(cls, code):
5181         """Convert language code from ISO 639-2/T to ISO 639-1"""
5182         for short_name, long_name in cls._lang_map.items():
5183             if long_name == code:
5184                 return short_name
5185
5186
5187 class ISO3166Utils(object):
5188     # From http://data.okfn.org/data/core/country-list
5189     _country_map = {
5190         'AF': 'Afghanistan',
5191         'AX': 'Åland Islands',
5192         'AL': 'Albania',
5193         'DZ': 'Algeria',
5194         'AS': 'American Samoa',
5195         'AD': 'Andorra',
5196         'AO': 'Angola',
5197         'AI': 'Anguilla',
5198         'AQ': 'Antarctica',
5199         'AG': 'Antigua and Barbuda',
5200         'AR': 'Argentina',
5201         'AM': 'Armenia',
5202         'AW': 'Aruba',
5203         'AU': 'Australia',
5204         'AT': 'Austria',
5205         'AZ': 'Azerbaijan',
5206         'BS': 'Bahamas',
5207         'BH': 'Bahrain',
5208         'BD': 'Bangladesh',
5209         'BB': 'Barbados',
5210         'BY': 'Belarus',
5211         'BE': 'Belgium',
5212         'BZ': 'Belize',
5213         'BJ': 'Benin',
5214         'BM': 'Bermuda',
5215         'BT': 'Bhutan',
5216         'BO': 'Bolivia, Plurinational State of',
5217         'BQ': 'Bonaire, Sint Eustatius and Saba',
5218         'BA': 'Bosnia and Herzegovina',
5219         'BW': 'Botswana',
5220         'BV': 'Bouvet Island',
5221         'BR': 'Brazil',
5222         'IO': 'British Indian Ocean Territory',
5223         'BN': 'Brunei Darussalam',
5224         'BG': 'Bulgaria',
5225         'BF': 'Burkina Faso',
5226         'BI': 'Burundi',
5227         'KH': 'Cambodia',
5228         'CM': 'Cameroon',
5229         'CA': 'Canada',
5230         'CV': 'Cape Verde',
5231         'KY': 'Cayman Islands',
5232         'CF': 'Central African Republic',
5233         'TD': 'Chad',
5234         'CL': 'Chile',
5235         'CN': 'China',
5236         'CX': 'Christmas Island',
5237         'CC': 'Cocos (Keeling) Islands',
5238         'CO': 'Colombia',
5239         'KM': 'Comoros',
5240         'CG': 'Congo',
5241         'CD': 'Congo, the Democratic Republic of the',
5242         'CK': 'Cook Islands',
5243         'CR': 'Costa Rica',
5244         'CI': 'Côte d\'Ivoire',
5245         'HR': 'Croatia',
5246         'CU': 'Cuba',
5247         'CW': 'Curaçao',
5248         'CY': 'Cyprus',
5249         'CZ': 'Czech Republic',
5250         'DK': 'Denmark',
5251         'DJ': 'Djibouti',
5252         'DM': 'Dominica',
5253         'DO': 'Dominican Republic',
5254         'EC': 'Ecuador',
5255         'EG': 'Egypt',
5256         'SV': 'El Salvador',
5257         'GQ': 'Equatorial Guinea',
5258         'ER': 'Eritrea',
5259         'EE': 'Estonia',
5260         'ET': 'Ethiopia',
5261         'FK': 'Falkland Islands (Malvinas)',
5262         'FO': 'Faroe Islands',
5263         'FJ': 'Fiji',
5264         'FI': 'Finland',
5265         'FR': 'France',
5266         'GF': 'French Guiana',
5267         'PF': 'French Polynesia',
5268         'TF': 'French Southern Territories',
5269         'GA': 'Gabon',
5270         'GM': 'Gambia',
5271         'GE': 'Georgia',
5272         'DE': 'Germany',
5273         'GH': 'Ghana',
5274         'GI': 'Gibraltar',
5275         'GR': 'Greece',
5276         'GL': 'Greenland',
5277         'GD': 'Grenada',
5278         'GP': 'Guadeloupe',
5279         'GU': 'Guam',
5280         'GT': 'Guatemala',
5281         'GG': 'Guernsey',
5282         'GN': 'Guinea',
5283         'GW': 'Guinea-Bissau',
5284         'GY': 'Guyana',
5285         'HT': 'Haiti',
5286         'HM': 'Heard Island and McDonald Islands',
5287         'VA': 'Holy See (Vatican City State)',
5288         'HN': 'Honduras',
5289         'HK': 'Hong Kong',
5290         'HU': 'Hungary',
5291         'IS': 'Iceland',
5292         'IN': 'India',
5293         'ID': 'Indonesia',
5294         'IR': 'Iran, Islamic Republic of',
5295         'IQ': 'Iraq',
5296         'IE': 'Ireland',
5297         'IM': 'Isle of Man',
5298         'IL': 'Israel',
5299         'IT': 'Italy',
5300         'JM': 'Jamaica',
5301         'JP': 'Japan',
5302         'JE': 'Jersey',
5303         'JO': 'Jordan',
5304         'KZ': 'Kazakhstan',
5305         'KE': 'Kenya',
5306         'KI': 'Kiribati',
5307         'KP': 'Korea, Democratic People\'s Republic of',
5308         'KR': 'Korea, Republic of',
5309         'KW': 'Kuwait',
5310         'KG': 'Kyrgyzstan',
5311         'LA': 'Lao People\'s Democratic Republic',
5312         'LV': 'Latvia',
5313         'LB': 'Lebanon',
5314         'LS': 'Lesotho',
5315         'LR': 'Liberia',
5316         'LY': 'Libya',
5317         'LI': 'Liechtenstein',
5318         'LT': 'Lithuania',
5319         'LU': 'Luxembourg',
5320         'MO': 'Macao',
5321         'MK': 'Macedonia, the Former Yugoslav Republic of',
5322         'MG': 'Madagascar',
5323         'MW': 'Malawi',
5324         'MY': 'Malaysia',
5325         'MV': 'Maldives',
5326         'ML': 'Mali',
5327         'MT': 'Malta',
5328         'MH': 'Marshall Islands',
5329         'MQ': 'Martinique',
5330         'MR': 'Mauritania',
5331         'MU': 'Mauritius',
5332         'YT': 'Mayotte',
5333         'MX': 'Mexico',
5334         'FM': 'Micronesia, Federated States of',
5335         'MD': 'Moldova, Republic of',
5336         'MC': 'Monaco',
5337         'MN': 'Mongolia',
5338         'ME': 'Montenegro',
5339         'MS': 'Montserrat',
5340         'MA': 'Morocco',
5341         'MZ': 'Mozambique',
5342         'MM': 'Myanmar',
5343         'NA': 'Namibia',
5344         'NR': 'Nauru',
5345         'NP': 'Nepal',
5346         'NL': 'Netherlands',
5347         'NC': 'New Caledonia',
5348         'NZ': 'New Zealand',
5349         'NI': 'Nicaragua',
5350         'NE': 'Niger',
5351         'NG': 'Nigeria',
5352         'NU': 'Niue',
5353         'NF': 'Norfolk Island',
5354         'MP': 'Northern Mariana Islands',
5355         'NO': 'Norway',
5356         'OM': 'Oman',
5357         'PK': 'Pakistan',
5358         'PW': 'Palau',
5359         'PS': 'Palestine, State of',
5360         'PA': 'Panama',
5361         'PG': 'Papua New Guinea',
5362         'PY': 'Paraguay',
5363         'PE': 'Peru',
5364         'PH': 'Philippines',
5365         'PN': 'Pitcairn',
5366         'PL': 'Poland',
5367         'PT': 'Portugal',
5368         'PR': 'Puerto Rico',
5369         'QA': 'Qatar',
5370         'RE': 'Réunion',
5371         'RO': 'Romania',
5372         'RU': 'Russian Federation',
5373         'RW': 'Rwanda',
5374         'BL': 'Saint Barthélemy',
5375         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5376         'KN': 'Saint Kitts and Nevis',
5377         'LC': 'Saint Lucia',
5378         'MF': 'Saint Martin (French part)',
5379         'PM': 'Saint Pierre and Miquelon',
5380         'VC': 'Saint Vincent and the Grenadines',
5381         'WS': 'Samoa',
5382         'SM': 'San Marino',
5383         'ST': 'Sao Tome and Principe',
5384         'SA': 'Saudi Arabia',
5385         'SN': 'Senegal',
5386         'RS': 'Serbia',
5387         'SC': 'Seychelles',
5388         'SL': 'Sierra Leone',
5389         'SG': 'Singapore',
5390         'SX': 'Sint Maarten (Dutch part)',
5391         'SK': 'Slovakia',
5392         'SI': 'Slovenia',
5393         'SB': 'Solomon Islands',
5394         'SO': 'Somalia',
5395         'ZA': 'South Africa',
5396         'GS': 'South Georgia and the South Sandwich Islands',
5397         'SS': 'South Sudan',
5398         'ES': 'Spain',
5399         'LK': 'Sri Lanka',
5400         'SD': 'Sudan',
5401         'SR': 'Suriname',
5402         'SJ': 'Svalbard and Jan Mayen',
5403         'SZ': 'Swaziland',
5404         'SE': 'Sweden',
5405         'CH': 'Switzerland',
5406         'SY': 'Syrian Arab Republic',
5407         'TW': 'Taiwan, Province of China',
5408         'TJ': 'Tajikistan',
5409         'TZ': 'Tanzania, United Republic of',
5410         'TH': 'Thailand',
5411         'TL': 'Timor-Leste',
5412         'TG': 'Togo',
5413         'TK': 'Tokelau',
5414         'TO': 'Tonga',
5415         'TT': 'Trinidad and Tobago',
5416         'TN': 'Tunisia',
5417         'TR': 'Turkey',
5418         'TM': 'Turkmenistan',
5419         'TC': 'Turks and Caicos Islands',
5420         'TV': 'Tuvalu',
5421         'UG': 'Uganda',
5422         'UA': 'Ukraine',
5423         'AE': 'United Arab Emirates',
5424         'GB': 'United Kingdom',
5425         'US': 'United States',
5426         'UM': 'United States Minor Outlying Islands',
5427         'UY': 'Uruguay',
5428         'UZ': 'Uzbekistan',
5429         'VU': 'Vanuatu',
5430         'VE': 'Venezuela, Bolivarian Republic of',
5431         'VN': 'Viet Nam',
5432         'VG': 'Virgin Islands, British',
5433         'VI': 'Virgin Islands, U.S.',
5434         'WF': 'Wallis and Futuna',
5435         'EH': 'Western Sahara',
5436         'YE': 'Yemen',
5437         'ZM': 'Zambia',
5438         'ZW': 'Zimbabwe',
5439     }
5440
5441     @classmethod
5442     def short2full(cls, code):
5443         """Convert an ISO 3166-2 country code to the corresponding full name"""
5444         return cls._country_map.get(code.upper())
5445
5446
5447 class GeoUtils(object):
5448     # Major IPv4 address blocks per country
5449     _country_ip_map = {
5450         'AD': '46.172.224.0/19',
5451         'AE': '94.200.0.0/13',
5452         'AF': '149.54.0.0/17',
5453         'AG': '209.59.64.0/18',
5454         'AI': '204.14.248.0/21',
5455         'AL': '46.99.0.0/16',
5456         'AM': '46.70.0.0/15',
5457         'AO': '105.168.0.0/13',
5458         'AP': '182.50.184.0/21',
5459         'AQ': '23.154.160.0/24',
5460         'AR': '181.0.0.0/12',
5461         'AS': '202.70.112.0/20',
5462         'AT': '77.116.0.0/14',
5463         'AU': '1.128.0.0/11',
5464         'AW': '181.41.0.0/18',
5465         'AX': '185.217.4.0/22',
5466         'AZ': '5.197.0.0/16',
5467         'BA': '31.176.128.0/17',
5468         'BB': '65.48.128.0/17',
5469         'BD': '114.130.0.0/16',
5470         'BE': '57.0.0.0/8',
5471         'BF': '102.178.0.0/15',
5472         'BG': '95.42.0.0/15',
5473         'BH': '37.131.0.0/17',
5474         'BI': '154.117.192.0/18',
5475         'BJ': '137.255.0.0/16',
5476         'BL': '185.212.72.0/23',
5477         'BM': '196.12.64.0/18',
5478         'BN': '156.31.0.0/16',
5479         'BO': '161.56.0.0/16',
5480         'BQ': '161.0.80.0/20',
5481         'BR': '191.128.0.0/12',
5482         'BS': '24.51.64.0/18',
5483         'BT': '119.2.96.0/19',
5484         'BW': '168.167.0.0/16',
5485         'BY': '178.120.0.0/13',
5486         'BZ': '179.42.192.0/18',
5487         'CA': '99.224.0.0/11',
5488         'CD': '41.243.0.0/16',
5489         'CF': '197.242.176.0/21',
5490         'CG': '160.113.0.0/16',
5491         'CH': '85.0.0.0/13',
5492         'CI': '102.136.0.0/14',
5493         'CK': '202.65.32.0/19',
5494         'CL': '152.172.0.0/14',
5495         'CM': '102.244.0.0/14',
5496         'CN': '36.128.0.0/10',
5497         'CO': '181.240.0.0/12',
5498         'CR': '201.192.0.0/12',
5499         'CU': '152.206.0.0/15',
5500         'CV': '165.90.96.0/19',
5501         'CW': '190.88.128.0/17',
5502         'CY': '31.153.0.0/16',
5503         'CZ': '88.100.0.0/14',
5504         'DE': '53.0.0.0/8',
5505         'DJ': '197.241.0.0/17',
5506         'DK': '87.48.0.0/12',
5507         'DM': '192.243.48.0/20',
5508         'DO': '152.166.0.0/15',
5509         'DZ': '41.96.0.0/12',
5510         'EC': '186.68.0.0/15',
5511         'EE': '90.190.0.0/15',
5512         'EG': '156.160.0.0/11',
5513         'ER': '196.200.96.0/20',
5514         'ES': '88.0.0.0/11',
5515         'ET': '196.188.0.0/14',
5516         'EU': '2.16.0.0/13',
5517         'FI': '91.152.0.0/13',
5518         'FJ': '144.120.0.0/16',
5519         'FK': '80.73.208.0/21',
5520         'FM': '119.252.112.0/20',
5521         'FO': '88.85.32.0/19',
5522         'FR': '90.0.0.0/9',
5523         'GA': '41.158.0.0/15',
5524         'GB': '25.0.0.0/8',
5525         'GD': '74.122.88.0/21',
5526         'GE': '31.146.0.0/16',
5527         'GF': '161.22.64.0/18',
5528         'GG': '62.68.160.0/19',
5529         'GH': '154.160.0.0/12',
5530         'GI': '95.164.0.0/16',
5531         'GL': '88.83.0.0/19',
5532         'GM': '160.182.0.0/15',
5533         'GN': '197.149.192.0/18',
5534         'GP': '104.250.0.0/19',
5535         'GQ': '105.235.224.0/20',
5536         'GR': '94.64.0.0/13',
5537         'GT': '168.234.0.0/16',
5538         'GU': '168.123.0.0/16',
5539         'GW': '197.214.80.0/20',
5540         'GY': '181.41.64.0/18',
5541         'HK': '113.252.0.0/14',
5542         'HN': '181.210.0.0/16',
5543         'HR': '93.136.0.0/13',
5544         'HT': '148.102.128.0/17',
5545         'HU': '84.0.0.0/14',
5546         'ID': '39.192.0.0/10',
5547         'IE': '87.32.0.0/12',
5548         'IL': '79.176.0.0/13',
5549         'IM': '5.62.80.0/20',
5550         'IN': '117.192.0.0/10',
5551         'IO': '203.83.48.0/21',
5552         'IQ': '37.236.0.0/14',
5553         'IR': '2.176.0.0/12',
5554         'IS': '82.221.0.0/16',
5555         'IT': '79.0.0.0/10',
5556         'JE': '87.244.64.0/18',
5557         'JM': '72.27.0.0/17',
5558         'JO': '176.29.0.0/16',
5559         'JP': '133.0.0.0/8',
5560         'KE': '105.48.0.0/12',
5561         'KG': '158.181.128.0/17',
5562         'KH': '36.37.128.0/17',
5563         'KI': '103.25.140.0/22',
5564         'KM': '197.255.224.0/20',
5565         'KN': '198.167.192.0/19',
5566         'KP': '175.45.176.0/22',
5567         'KR': '175.192.0.0/10',
5568         'KW': '37.36.0.0/14',
5569         'KY': '64.96.0.0/15',
5570         'KZ': '2.72.0.0/13',
5571         'LA': '115.84.64.0/18',
5572         'LB': '178.135.0.0/16',
5573         'LC': '24.92.144.0/20',
5574         'LI': '82.117.0.0/19',
5575         'LK': '112.134.0.0/15',
5576         'LR': '102.183.0.0/16',
5577         'LS': '129.232.0.0/17',
5578         'LT': '78.56.0.0/13',
5579         'LU': '188.42.0.0/16',
5580         'LV': '46.109.0.0/16',
5581         'LY': '41.252.0.0/14',
5582         'MA': '105.128.0.0/11',
5583         'MC': '88.209.64.0/18',
5584         'MD': '37.246.0.0/16',
5585         'ME': '178.175.0.0/17',
5586         'MF': '74.112.232.0/21',
5587         'MG': '154.126.0.0/17',
5588         'MH': '117.103.88.0/21',
5589         'MK': '77.28.0.0/15',
5590         'ML': '154.118.128.0/18',
5591         'MM': '37.111.0.0/17',
5592         'MN': '49.0.128.0/17',
5593         'MO': '60.246.0.0/16',
5594         'MP': '202.88.64.0/20',
5595         'MQ': '109.203.224.0/19',
5596         'MR': '41.188.64.0/18',
5597         'MS': '208.90.112.0/22',
5598         'MT': '46.11.0.0/16',
5599         'MU': '105.16.0.0/12',
5600         'MV': '27.114.128.0/18',
5601         'MW': '102.70.0.0/15',
5602         'MX': '187.192.0.0/11',
5603         'MY': '175.136.0.0/13',
5604         'MZ': '197.218.0.0/15',
5605         'NA': '41.182.0.0/16',
5606         'NC': '101.101.0.0/18',
5607         'NE': '197.214.0.0/18',
5608         'NF': '203.17.240.0/22',
5609         'NG': '105.112.0.0/12',
5610         'NI': '186.76.0.0/15',
5611         'NL': '145.96.0.0/11',
5612         'NO': '84.208.0.0/13',
5613         'NP': '36.252.0.0/15',
5614         'NR': '203.98.224.0/19',
5615         'NU': '49.156.48.0/22',
5616         'NZ': '49.224.0.0/14',
5617         'OM': '5.36.0.0/15',
5618         'PA': '186.72.0.0/15',
5619         'PE': '186.160.0.0/14',
5620         'PF': '123.50.64.0/18',
5621         'PG': '124.240.192.0/19',
5622         'PH': '49.144.0.0/13',
5623         'PK': '39.32.0.0/11',
5624         'PL': '83.0.0.0/11',
5625         'PM': '70.36.0.0/20',
5626         'PR': '66.50.0.0/16',
5627         'PS': '188.161.0.0/16',
5628         'PT': '85.240.0.0/13',
5629         'PW': '202.124.224.0/20',
5630         'PY': '181.120.0.0/14',
5631         'QA': '37.210.0.0/15',
5632         'RE': '102.35.0.0/16',
5633         'RO': '79.112.0.0/13',
5634         'RS': '93.86.0.0/15',
5635         'RU': '5.136.0.0/13',
5636         'RW': '41.186.0.0/16',
5637         'SA': '188.48.0.0/13',
5638         'SB': '202.1.160.0/19',
5639         'SC': '154.192.0.0/11',
5640         'SD': '102.120.0.0/13',
5641         'SE': '78.64.0.0/12',
5642         'SG': '8.128.0.0/10',
5643         'SI': '188.196.0.0/14',
5644         'SK': '78.98.0.0/15',
5645         'SL': '102.143.0.0/17',
5646         'SM': '89.186.32.0/19',
5647         'SN': '41.82.0.0/15',
5648         'SO': '154.115.192.0/18',
5649         'SR': '186.179.128.0/17',
5650         'SS': '105.235.208.0/21',
5651         'ST': '197.159.160.0/19',
5652         'SV': '168.243.0.0/16',
5653         'SX': '190.102.0.0/20',
5654         'SY': '5.0.0.0/16',
5655         'SZ': '41.84.224.0/19',
5656         'TC': '65.255.48.0/20',
5657         'TD': '154.68.128.0/19',
5658         'TG': '196.168.0.0/14',
5659         'TH': '171.96.0.0/13',
5660         'TJ': '85.9.128.0/18',
5661         'TK': '27.96.24.0/21',
5662         'TL': '180.189.160.0/20',
5663         'TM': '95.85.96.0/19',
5664         'TN': '197.0.0.0/11',
5665         'TO': '175.176.144.0/21',
5666         'TR': '78.160.0.0/11',
5667         'TT': '186.44.0.0/15',
5668         'TV': '202.2.96.0/19',
5669         'TW': '120.96.0.0/11',
5670         'TZ': '156.156.0.0/14',
5671         'UA': '37.52.0.0/14',
5672         'UG': '102.80.0.0/13',
5673         'US': '6.0.0.0/8',
5674         'UY': '167.56.0.0/13',
5675         'UZ': '84.54.64.0/18',
5676         'VA': '212.77.0.0/19',
5677         'VC': '207.191.240.0/21',
5678         'VE': '186.88.0.0/13',
5679         'VG': '66.81.192.0/20',
5680         'VI': '146.226.0.0/16',
5681         'VN': '14.160.0.0/11',
5682         'VU': '202.80.32.0/20',
5683         'WF': '117.20.32.0/21',
5684         'WS': '202.4.32.0/19',
5685         'YE': '134.35.0.0/16',
5686         'YT': '41.242.116.0/22',
5687         'ZA': '41.0.0.0/11',
5688         'ZM': '102.144.0.0/13',
5689         'ZW': '102.177.192.0/18',
5690     }
5691
5692     @classmethod
5693     def random_ipv4(cls, code_or_block):
5694         if len(code_or_block) == 2:
5695             block = cls._country_ip_map.get(code_or_block.upper())
5696             if not block:
5697                 return None
5698         else:
5699             block = code_or_block
5700         addr, preflen = block.split('/')
5701         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5702         addr_max = addr_min | (0xffffffff >> int(preflen))
5703         return compat_str(socket.inet_ntoa(
5704             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5705
5706
5707 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5708     def __init__(self, proxies=None):
5709         # Set default handlers
5710         for type in ('http', 'https'):
5711             setattr(self, '%s_open' % type,
5712                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5713                         meth(r, proxy, type))
5714         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5715
5716     def proxy_open(self, req, proxy, type):
5717         req_proxy = req.headers.get('Ytdl-request-proxy')
5718         if req_proxy is not None:
5719             proxy = req_proxy
5720             del req.headers['Ytdl-request-proxy']
5721
5722         if proxy == '__noproxy__':
5723             return None  # No Proxy
5724         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5725             req.add_header('Ytdl-socks-proxy', proxy)
5726             # yt-dlp's http/https handlers do wrapping the socket with socks
5727             return None
5728         return compat_urllib_request.ProxyHandler.proxy_open(
5729             self, req, proxy, type)
5730
5731
5732 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5733 # released into Public Domain
5734 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5735
5736 def long_to_bytes(n, blocksize=0):
5737     """long_to_bytes(n:long, blocksize:int) : string
5738     Convert a long integer to a byte string.
5739
5740     If optional blocksize is given and greater than zero, pad the front of the
5741     byte string with binary zeros so that the length is a multiple of
5742     blocksize.
5743     """
5744     # after much testing, this algorithm was deemed to be the fastest
5745     s = b''
5746     n = int(n)
5747     while n > 0:
5748         s = compat_struct_pack('>I', n & 0xffffffff) + s
5749         n = n >> 32
5750     # strip off leading zeros
5751     for i in range(len(s)):
5752         if s[i] != b'\000'[0]:
5753             break
5754     else:
5755         # only happens when n == 0
5756         s = b'\000'
5757         i = 0
5758     s = s[i:]
5759     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5760     # de-padding being done above, but sigh...
5761     if blocksize > 0 and len(s) % blocksize:
5762         s = (blocksize - len(s) % blocksize) * b'\000' + s
5763     return s
5764
5765
5766 def bytes_to_long(s):
5767     """bytes_to_long(string) : long
5768     Convert a byte string to a long integer.
5769
5770     This is (essentially) the inverse of long_to_bytes().
5771     """
5772     acc = 0
5773     length = len(s)
5774     if length % 4:
5775         extra = (4 - length % 4)
5776         s = b'\000' * extra + s
5777         length = length + extra
5778     for i in range(0, length, 4):
5779         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5780     return acc
5781
5782
5783 def ohdave_rsa_encrypt(data, exponent, modulus):
5784     '''
5785     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5786
5787     Input:
5788         data: data to encrypt, bytes-like object
5789         exponent, modulus: parameter e and N of RSA algorithm, both integer
5790     Output: hex string of encrypted data
5791
5792     Limitation: supports one block encryption only
5793     '''
5794
5795     payload = int(binascii.hexlify(data[::-1]), 16)
5796     encrypted = pow(payload, exponent, modulus)
5797     return '%x' % encrypted
5798
5799
5800 def pkcs1pad(data, length):
5801     """
5802     Padding input data with PKCS#1 scheme
5803
5804     @param {int[]} data        input data
5805     @param {int}   length      target length
5806     @returns {int[]}           padded data
5807     """
5808     if len(data) > length - 11:
5809         raise ValueError('Input data too long for PKCS#1 padding')
5810
5811     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5812     return [0, 2] + pseudo_random + [0] + data
5813
5814
5815 def encode_base_n(num, n, table=None):
5816     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5817     if not table:
5818         table = FULL_TABLE[:n]
5819
5820     if n > len(table):
5821         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5822
5823     if num == 0:
5824         return table[0]
5825
5826     ret = ''
5827     while num:
5828         ret = table[num % n] + ret
5829         num = num // n
5830     return ret
5831
5832
5833 def decode_packed_codes(code):
5834     mobj = re.search(PACKED_CODES_RE, code)
5835     obfuscated_code, base, count, symbols = mobj.groups()
5836     base = int(base)
5837     count = int(count)
5838     symbols = symbols.split('|')
5839     symbol_table = {}
5840
5841     while count:
5842         count -= 1
5843         base_n_count = encode_base_n(count, base)
5844         symbol_table[base_n_count] = symbols[count] or base_n_count
5845
5846     return re.sub(
5847         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5848         obfuscated_code)
5849
5850
5851 def caesar(s, alphabet, shift):
5852     if shift == 0:
5853         return s
5854     l = len(alphabet)
5855     return ''.join(
5856         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5857         for c in s)
5858
5859
5860 def rot47(s):
5861     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5862
5863
5864 def parse_m3u8_attributes(attrib):
5865     info = {}
5866     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5867         if val.startswith('"'):
5868             val = val[1:-1]
5869         info[key] = val
5870     return info
5871
5872
5873 def urshift(val, n):
5874     return val >> n if val >= 0 else (val + 0x100000000) >> n
5875
5876
5877 # Based on png2str() written by @gdkchan and improved by @yokrysty
5878 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5879 def decode_png(png_data):
5880     # Reference: https://www.w3.org/TR/PNG/
5881     header = png_data[8:]
5882
5883     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5884         raise IOError('Not a valid PNG file.')
5885
5886     int_map = {1: '>B', 2: '>H', 4: '>I'}
5887     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5888
5889     chunks = []
5890
5891     while header:
5892         length = unpack_integer(header[:4])
5893         header = header[4:]
5894
5895         chunk_type = header[:4]
5896         header = header[4:]
5897
5898         chunk_data = header[:length]
5899         header = header[length:]
5900
5901         header = header[4:]  # Skip CRC
5902
5903         chunks.append({
5904             'type': chunk_type,
5905             'length': length,
5906             'data': chunk_data
5907         })
5908
5909     ihdr = chunks[0]['data']
5910
5911     width = unpack_integer(ihdr[:4])
5912     height = unpack_integer(ihdr[4:8])
5913
5914     idat = b''
5915
5916     for chunk in chunks:
5917         if chunk['type'] == b'IDAT':
5918             idat += chunk['data']
5919
5920     if not idat:
5921         raise IOError('Unable to read PNG data.')
5922
5923     decompressed_data = bytearray(zlib.decompress(idat))
5924
5925     stride = width * 3
5926     pixels = []
5927
5928     def _get_pixel(idx):
5929         x = idx % stride
5930         y = idx // stride
5931         return pixels[y][x]
5932
5933     for y in range(height):
5934         basePos = y * (1 + stride)
5935         filter_type = decompressed_data[basePos]
5936
5937         current_row = []
5938
5939         pixels.append(current_row)
5940
5941         for x in range(stride):
5942             color = decompressed_data[1 + basePos + x]
5943             basex = y * stride + x
5944             left = 0
5945             up = 0
5946
5947             if x > 2:
5948                 left = _get_pixel(basex - 3)
5949             if y > 0:
5950                 up = _get_pixel(basex - stride)
5951
5952             if filter_type == 1:  # Sub
5953                 color = (color + left) & 0xff
5954             elif filter_type == 2:  # Up
5955                 color = (color + up) & 0xff
5956             elif filter_type == 3:  # Average
5957                 color = (color + ((left + up) >> 1)) & 0xff
5958             elif filter_type == 4:  # Paeth
5959                 a = left
5960                 b = up
5961                 c = 0
5962
5963                 if x > 2 and y > 0:
5964                     c = _get_pixel(basex - stride - 3)
5965
5966                 p = a + b - c
5967
5968                 pa = abs(p - a)
5969                 pb = abs(p - b)
5970                 pc = abs(p - c)
5971
5972                 if pa <= pb and pa <= pc:
5973                     color = (color + a) & 0xff
5974                 elif pb <= pc:
5975                     color = (color + b) & 0xff
5976                 else:
5977                     color = (color + c) & 0xff
5978
5979             current_row.append(color)
5980
5981     return width, height, pixels
5982
5983
5984 def write_xattr(path, key, value):
5985     # This mess below finds the best xattr tool for the job
5986     try:
5987         # try the pyxattr module...
5988         import xattr
5989
5990         if hasattr(xattr, 'set'):  # pyxattr
5991             # Unicode arguments are not supported in python-pyxattr until
5992             # version 0.5.0
5993             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5994             pyxattr_required_version = '0.5.0'
5995             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5996                 # TODO: fallback to CLI tools
5997                 raise XAttrUnavailableError(
5998                     'python-pyxattr is detected but is too old. '
5999                     'yt-dlp requires %s or above while your version is %s. '
6000                     'Falling back to other xattr implementations' % (
6001                         pyxattr_required_version, xattr.__version__))
6002
6003             setxattr = xattr.set
6004         else:  # xattr
6005             setxattr = xattr.setxattr
6006
6007         try:
6008             setxattr(path, key, value)
6009         except EnvironmentError as e:
6010             raise XAttrMetadataError(e.errno, e.strerror)
6011
6012     except ImportError:
6013         if compat_os_name == 'nt':
6014             # Write xattrs to NTFS Alternate Data Streams:
6015             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6016             assert ':' not in key
6017             assert os.path.exists(path)
6018
6019             ads_fn = path + ':' + key
6020             try:
6021                 with open(ads_fn, 'wb') as f:
6022                     f.write(value)
6023             except EnvironmentError as e:
6024                 raise XAttrMetadataError(e.errno, e.strerror)
6025         else:
6026             user_has_setfattr = check_executable('setfattr', ['--version'])
6027             user_has_xattr = check_executable('xattr', ['-h'])
6028
6029             if user_has_setfattr or user_has_xattr:
6030
6031                 value = value.decode('utf-8')
6032                 if user_has_setfattr:
6033                     executable = 'setfattr'
6034                     opts = ['-n', key, '-v', value]
6035                 elif user_has_xattr:
6036                     executable = 'xattr'
6037                     opts = ['-w', key, value]
6038
6039                 cmd = ([encodeFilename(executable, True)]
6040                        + [encodeArgument(o) for o in opts]
6041                        + [encodeFilename(path, True)])
6042
6043                 try:
6044                     p = subprocess.Popen(
6045                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6046                 except EnvironmentError as e:
6047                     raise XAttrMetadataError(e.errno, e.strerror)
6048                 stdout, stderr = process_communicate_or_kill(p)
6049                 stderr = stderr.decode('utf-8', 'replace')
6050                 if p.returncode != 0:
6051                     raise XAttrMetadataError(p.returncode, stderr)
6052
6053             else:
6054                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6055                 if sys.platform.startswith('linux'):
6056                     raise XAttrUnavailableError(
6057                         "Couldn't find a tool to set the xattrs. "
6058                         "Install either the python 'pyxattr' or 'xattr' "
6059                         "modules, or the GNU 'attr' package "
6060                         "(which contains the 'setfattr' tool).")
6061                 else:
6062                     raise XAttrUnavailableError(
6063                         "Couldn't find a tool to set the xattrs. "
6064                         "Install either the python 'xattr' module, "
6065                         "or the 'xattr' binary.")
6066
6067
6068 def random_birthday(year_field, month_field, day_field):
6069     start_date = datetime.date(1950, 1, 1)
6070     end_date = datetime.date(1995, 12, 31)
6071     offset = random.randint(0, (end_date - start_date).days)
6072     random_date = start_date + datetime.timedelta(offset)
6073     return {
6074         year_field: str(random_date.year),
6075         month_field: str(random_date.month),
6076         day_field: str(random_date.day),
6077     }
6078
6079
6080 # Templates for internet shortcut files, which are plain text files.
6081 DOT_URL_LINK_TEMPLATE = '''
6082 [InternetShortcut]
6083 URL=%(url)s
6084 '''.lstrip()
6085
6086 DOT_WEBLOC_LINK_TEMPLATE = '''
6087 <?xml version="1.0" encoding="UTF-8"?>
6088 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6089 <plist version="1.0">
6090 <dict>
6091 \t<key>URL</key>
6092 \t<string>%(url)s</string>
6093 </dict>
6094 </plist>
6095 '''.lstrip()
6096
6097 DOT_DESKTOP_LINK_TEMPLATE = '''
6098 [Desktop Entry]
6099 Encoding=UTF-8
6100 Name=%(filename)s
6101 Type=Link
6102 URL=%(url)s
6103 Icon=text-html
6104 '''.lstrip()
6105
6106
6107 def iri_to_uri(iri):
6108     """
6109     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6110
6111     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6112     """
6113
6114     iri_parts = compat_urllib_parse_urlparse(iri)
6115
6116     if '[' in iri_parts.netloc:
6117         raise ValueError('IPv6 URIs are not, yet, supported.')
6118         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6119
6120     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6121
6122     net_location = ''
6123     if iri_parts.username:
6124         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6125         if iri_parts.password is not None:
6126             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6127         net_location += '@'
6128
6129     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6130     # The 'idna' encoding produces ASCII text.
6131     if iri_parts.port is not None and iri_parts.port != 80:
6132         net_location += ':' + str(iri_parts.port)
6133
6134     return compat_urllib_parse_urlunparse(
6135         (iri_parts.scheme,
6136             net_location,
6137
6138             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6139
6140             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6141             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6142
6143             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6144             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6145
6146             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6147
6148     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6149
6150
6151 def to_high_limit_path(path):
6152     if sys.platform in ['win32', 'cygwin']:
6153         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6154         return r'\\?\ '.rstrip() + os.path.abspath(path)
6155
6156     return path
6157
6158
6159 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6160     if field is None:
6161         val = obj if obj is not None else default
6162     else:
6163         val = obj.get(field, default)
6164     if func and val not in ignore:
6165         val = func(val)
6166     return template % val if val not in ignore else default
6167
6168
6169 def clean_podcast_url(url):
6170     return re.sub(r'''(?x)
6171         (?:
6172             (?:
6173                 chtbl\.com/track|
6174                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6175                 play\.podtrac\.com
6176             )/[^/]+|
6177             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6178             flex\.acast\.com|
6179             pd(?:
6180                 cn\.co| # https://podcorn.com/analytics-prefix/
6181                 st\.fm # https://podsights.com/docs/
6182             )/e
6183         )/''', '', url)
6184
6185
6186 _HEX_TABLE = '0123456789abcdef'
6187
6188
6189 def random_uuidv4():
6190     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6191
6192
6193 def make_dir(path, to_screen=None):
6194     try:
6195         dn = os.path.dirname(path)
6196         if dn and not os.path.exists(dn):
6197             os.makedirs(dn)
6198         return True
6199     except (OSError, IOError) as err:
6200         if callable(to_screen) is not None:
6201             to_screen('unable to create directory ' + error_to_compat_str(err))
6202         return False
6203
6204
6205 def get_executable_path():
6206     from zipimport import zipimporter
6207     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6208         path = os.path.dirname(sys.executable)
6209     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6210         path = os.path.join(os.path.dirname(__file__), '../..')
6211     else:
6212         path = os.path.join(os.path.dirname(__file__), '..')
6213     return os.path.abspath(path)
6214
6215
6216 def load_plugins(name, suffix, namespace):
6217     plugin_info = [None]
6218     classes = []
6219     try:
6220         plugin_info = imp.find_module(
6221             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6222         plugins = imp.load_module(name, *plugin_info)
6223         for name in dir(plugins):
6224             if name in namespace:
6225                 continue
6226             if not name.endswith(suffix):
6227                 continue
6228             klass = getattr(plugins, name)
6229             classes.append(klass)
6230             namespace[name] = klass
6231     except ImportError:
6232         pass
6233     finally:
6234         if plugin_info[0] is not None:
6235             plugin_info[0].close()
6236     return classes
6237
6238
6239 def traverse_obj(
6240         obj, *path_list, default=None, expected_type=None, get_all=True,
6241         casesense=True, is_user_input=False, traverse_string=False):
6242     ''' Traverse nested list/dict/tuple
6243     @param path_list        A list of paths which are checked one by one.
6244                             Each path is a list of keys where each key is a string,
6245                             a tuple of strings or "...". When a tuple is given,
6246                             all the keys given in the tuple are traversed, and
6247                             "..." traverses all the keys in the object
6248     @param default          Default value to return
6249     @param expected_type    Only accept final value of this type (Can also be any callable)
6250     @param get_all          Return all the values obtained from a path or only the first one
6251     @param casesense        Whether to consider dictionary keys as case sensitive
6252     @param is_user_input    Whether the keys are generated from user input. If True,
6253                             strings are converted to int/slice if necessary
6254     @param traverse_string  Whether to traverse inside strings. If True, any
6255                             non-compatible object will also be converted into a string
6256     # TODO: Write tests
6257     '''
6258     if not casesense:
6259         _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6260         path_list = (map(_lower, variadic(path)) for path in path_list)
6261
6262     def _traverse_obj(obj, path, _current_depth=0):
6263         nonlocal depth
6264         if obj is None:
6265             return None
6266         path = tuple(variadic(path))
6267         for i, key in enumerate(path):
6268             if isinstance(key, (list, tuple)):
6269                 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6270                 key = ...
6271             if key is ...:
6272                 obj = (obj.values() if isinstance(obj, dict)
6273                        else obj if isinstance(obj, (list, tuple, LazyList))
6274                        else str(obj) if traverse_string else [])
6275                 _current_depth += 1
6276                 depth = max(depth, _current_depth)
6277                 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6278             elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6279                 obj = (obj.get(key) if casesense or (key in obj)
6280                        else next((v for k, v in obj.items() if _lower(k) == key), None))
6281             else:
6282                 if is_user_input:
6283                     key = (int_or_none(key) if ':' not in key
6284                            else slice(*map(int_or_none, key.split(':'))))
6285                     if key == slice(None):
6286                         return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6287                 if not isinstance(key, (int, slice)):
6288                     return None
6289                 if not isinstance(obj, (list, tuple, LazyList)):
6290                     if not traverse_string:
6291                         return None
6292                     obj = str(obj)
6293                 try:
6294                     obj = obj[key]
6295                 except IndexError:
6296                     return None
6297         return obj
6298
6299     if isinstance(expected_type, type):
6300         type_test = lambda val: val if isinstance(val, expected_type) else None
6301     elif expected_type is not None:
6302         type_test = expected_type
6303     else:
6304         type_test = lambda val: val
6305
6306     for path in path_list:
6307         depth = 0
6308         val = _traverse_obj(obj, path)
6309         if val is not None:
6310             if depth:
6311                 for _ in range(depth - 1):
6312                     val = itertools.chain.from_iterable(v for v in val if v is not None)
6313                 val = [v for v in map(type_test, val) if v is not None]
6314                 if val:
6315                     return val if get_all else val[0]
6316             else:
6317                 val = type_test(val)
6318                 if val is not None:
6319                     return val
6320     return default
6321
6322
6323 def traverse_dict(dictn, keys, casesense=True):
6324     ''' For backward compatibility. Do not use '''
6325     return traverse_obj(dictn, keys, casesense=casesense,
6326                         is_user_input=True, traverse_string=True)
6327
6328
6329 def variadic(x, allowed_types=(str, bytes)):
6330     return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)