yt_dlp/utils.py

   1 #!/usr/bin/env python3
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725     '%d %B %Y',
1726     '%d %b %Y',
1727     '%B %d %Y',
1728     '%B %dst %Y',
1729     '%B %dnd %Y',
1730     '%B %drd %Y',
1731     '%B %dth %Y',
1732     '%b %d %Y',
1733     '%b %dst %Y',
1734     '%b %dnd %Y',
1735     '%b %drd %Y',
1736     '%b %dth %Y',
1737     '%b %dst %Y %I:%M',
1738     '%b %dnd %Y %I:%M',
1739     '%b %drd %Y %I:%M',
1740     '%b %dth %Y %I:%M',
1741     '%Y %m %d',
1742     '%Y-%m-%d',
1743     '%Y/%m/%d',
1744     '%Y/%m/%d %H:%M',
1745     '%Y/%m/%d %H:%M:%S',
1746     '%Y-%m-%d %H:%M',
1747     '%Y-%m-%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M:%S.%f',
1749     '%Y-%m-%d %H:%M:%S:%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf, default=repr)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     if s == '':
2111         return ''
2112     # Handle timestamps
2113     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114     result = ''.join(map(replace_insane, s))
2115     if not is_id:
2116         while '__' in result:
2117             result = result.replace('__', '_')
2118         result = result.strip('_')
2119         # Common case of "Foreign band name - English song title"
2120         if restricted and result.startswith('-_'):
2121             result = result[2:]
2122         if result.startswith('-'):
2123             result = '_' + result[len('-'):]
2124         result = result.lstrip('.')
2125         if not result:
2126             result = '_'
2127     return result
2128
2129
2130 def sanitize_path(s, force=False):
2131     """Sanitizes and normalizes path on Windows"""
2132     if sys.platform == 'win32':
2133         force = False
2134         drive_or_unc, _ = os.path.splitdrive(s)
2135         if sys.version_info < (2, 7) and not drive_or_unc:
2136             drive_or_unc, _ = os.path.splitunc(s)
2137     elif force:
2138         drive_or_unc = ''
2139     else:
2140         return s
2141
2142     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143     if drive_or_unc:
2144         norm_path.pop(0)
2145     sanitized_path = [
2146         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147         for path_part in norm_path]
2148     if drive_or_unc:
2149         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150     elif force and s[0] == os.path.sep:
2151         sanitized_path.insert(0, os.path.sep)
2152     return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157     # the number of unwanted failures due to missing protocol
2158     if url.startswith('//'):
2159         return 'http:%s' % url
2160     # Fix some common typos seen so far
2161     COMMON_TYPOS = (
2162         # https://github.com/ytdl-org/youtube-dl/issues/15649
2163         (r'^httpss://', r'https://'),
2164         # https://bx1.be/lives/direct-tv/
2165         (r'^rmtp([es]?)://', r'rtmp\1://'),
2166     )
2167     for mistake, fixup in COMMON_TYPOS:
2168         if re.match(mistake, url):
2169             return re.sub(mistake, fixup, url)
2170     return url
2171
2172
2173 def extract_basic_auth(url):
2174     parts = compat_urlparse.urlsplit(url)
2175     if parts.username is None:
2176         return url, None
2177     url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178         parts.hostname if parts.port is None
2179         else '%s:%d' % (parts.hostname, parts.port))))
2180     auth_payload = base64.b64encode(
2181         ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182     return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186     url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187     if auth_header is not None:
2188         headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189         headers['Authorization'] = auth_header
2190     return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194     """Expand shell variables and ~"""
2195     return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199     """ Remove all duplicates from the input iterable """
2200     res = []
2201     for el in iterable:
2202         if el not in res:
2203             res.append(el)
2204     return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208     """Transforms an HTML entity to a character."""
2209     entity = entity_with_semicolon[:-1]
2210
2211     # Known non-numeric HTML entity
2212     if entity in compat_html_entities.name2codepoint:
2213         return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215     # TODO: HTML5 allows entities without a semicolon. For example,
2216     # '&Eacuteric' should be decoded as 'Éric'.
2217     if entity_with_semicolon in compat_html_entities_html5:
2218         return compat_html_entities_html5[entity_with_semicolon]
2219
2220     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221     if mobj is not None:
2222         numstr = mobj.group(1)
2223         if numstr.startswith('x'):
2224             base = 16
2225             numstr = '0%s' % numstr
2226         else:
2227             base = 10
2228         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229         try:
2230             return compat_chr(int(numstr, base))
2231         except ValueError:
2232             pass
2233
2234     # Unknown entity in name, return its literal representation
2235     return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239     if s is None:
2240         return None
2241     assert type(s) == compat_str
2242
2243     return re.sub(
2244         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def process_communicate_or_kill(p, *args, **kwargs):
2248     try:
2249         return p.communicate(*args, **kwargs)
2250     except BaseException:  # Including KeyboardInterrupt
2251         p.kill()
2252         p.wait()
2253         raise
2254
2255
2256 def get_subprocess_encoding():
2257     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2258         # For subprocess calls, encode with locale encoding
2259         # Refer to http://stackoverflow.com/a/9951851/35070
2260         encoding = preferredencoding()
2261     else:
2262         encoding = sys.getfilesystemencoding()
2263     if encoding is None:
2264         encoding = 'utf-8'
2265     return encoding
2266
2267
2268 def encodeFilename(s, for_subprocess=False):
2269     """
2270     @param s The name of the file
2271     """
2272
2273     assert type(s) == compat_str
2274
2275     # Python 3 has a Unicode API
2276     if sys.version_info >= (3, 0):
2277         return s
2278
2279     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2280     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2281     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2282     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2283         return s
2284
2285     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2286     if sys.platform.startswith('java'):
2287         return s
2288
2289     return s.encode(get_subprocess_encoding(), 'ignore')
2290
2291
2292 def decodeFilename(b, for_subprocess=False):
2293
2294     if sys.version_info >= (3, 0):
2295         return b
2296
2297     if not isinstance(b, bytes):
2298         return b
2299
2300     return b.decode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def encodeArgument(s):
2304     if not isinstance(s, compat_str):
2305         # Legacy code that uses byte strings
2306         # Uncomment the following line after fixing all post processors
2307         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2308         s = s.decode('ascii')
2309     return encodeFilename(s, True)
2310
2311
2312 def decodeArgument(b):
2313     return decodeFilename(b, True)
2314
2315
2316 def decodeOption(optval):
2317     if optval is None:
2318         return optval
2319     if isinstance(optval, bytes):
2320         optval = optval.decode(preferredencoding())
2321
2322     assert isinstance(optval, compat_str)
2323     return optval
2324
2325
2326 def formatSeconds(secs, delim=':'):
2327     if secs > 3600:
2328         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2329     elif secs > 60:
2330         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2331     else:
2332         return '%d' % secs
2333
2334
2335 def make_HTTPS_handler(params, **kwargs):
2336     opts_no_check_certificate = params.get('nocheckcertificate', False)
2337     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2338         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2339         if opts_no_check_certificate:
2340             context.check_hostname = False
2341             context.verify_mode = ssl.CERT_NONE
2342         try:
2343             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2344         except TypeError:
2345             # Python 2.7.8
2346             # (create_default_context present but HTTPSHandler has no context=)
2347             pass
2348
2349     if sys.version_info < (3, 2):
2350         return YoutubeDLHTTPSHandler(params, **kwargs)
2351     else:  # Python < 3.4
2352         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2353         context.verify_mode = (ssl.CERT_NONE
2354                                if opts_no_check_certificate
2355                                else ssl.CERT_REQUIRED)
2356         context.set_default_verify_paths()
2357         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2358
2359
2360 def bug_reports_message(before=';'):
2361     if ytdl_is_updateable():
2362         update_cmd = 'type  yt-dlp -U  to update'
2363     else:
2364         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2365     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2366     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2367     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2368
2369     before = before.rstrip()
2370     if not before or before.endswith(('.', '!', '?')):
2371         msg = msg[0].title() + msg[1:]
2372
2373     return (before + ' ' if before else '') + msg
2374
2375
2376 class YoutubeDLError(Exception):
2377     """Base exception for YoutubeDL errors."""
2378     pass
2379
2380
2381 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2382 if hasattr(ssl, 'CertificateError'):
2383     network_exceptions.append(ssl.CertificateError)
2384 network_exceptions = tuple(network_exceptions)
2385
2386
2387 class ExtractorError(YoutubeDLError):
2388     """Error during info extraction."""
2389
2390     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2391         """ tb, if given, is the original traceback (so that it can be printed out).
2392         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2393         """
2394
2395         if sys.exc_info()[0] in network_exceptions:
2396             expected = True
2397         if video_id is not None:
2398             msg = video_id + ': ' + msg
2399         if cause:
2400             msg += ' (caused by %r)' % cause
2401         if not expected:
2402             msg += bug_reports_message()
2403         super(ExtractorError, self).__init__(msg)
2404
2405         self.traceback = tb
2406         self.exc_info = sys.exc_info()  # preserve original exception
2407         self.cause = cause
2408         self.video_id = video_id
2409
2410     def format_traceback(self):
2411         if self.traceback is None:
2412             return None
2413         return ''.join(traceback.format_tb(self.traceback))
2414
2415
2416 class UnsupportedError(ExtractorError):
2417     def __init__(self, url):
2418         super(UnsupportedError, self).__init__(
2419             'Unsupported URL: %s' % url, expected=True)
2420         self.url = url
2421
2422
2423 class RegexNotFoundError(ExtractorError):
2424     """Error when a regex didn't match"""
2425     pass
2426
2427
2428 class GeoRestrictedError(ExtractorError):
2429     """Geographic restriction Error exception.
2430
2431     This exception may be thrown when a video is not available from your
2432     geographic location due to geographic restrictions imposed by a website.
2433     """
2434
2435     def __init__(self, msg, countries=None):
2436         super(GeoRestrictedError, self).__init__(msg, expected=True)
2437         self.msg = msg
2438         self.countries = countries
2439
2440
2441 class DownloadError(YoutubeDLError):
2442     """Download Error exception.
2443
2444     This exception may be thrown by FileDownloader objects if they are not
2445     configured to continue on errors. They will contain the appropriate
2446     error message.
2447     """
2448
2449     def __init__(self, msg, exc_info=None):
2450         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2451         super(DownloadError, self).__init__(msg)
2452         self.exc_info = exc_info
2453
2454
2455 class EntryNotInPlaylist(YoutubeDLError):
2456     """Entry not in playlist exception.
2457
2458     This exception will be thrown by YoutubeDL when a requested entry
2459     is not found in the playlist info_dict
2460     """
2461     pass
2462
2463
2464 class SameFileError(YoutubeDLError):
2465     """Same File exception.
2466
2467     This exception will be thrown by FileDownloader objects if they detect
2468     multiple files would have to be downloaded to the same file on disk.
2469     """
2470     pass
2471
2472
2473 class PostProcessingError(YoutubeDLError):
2474     """Post Processing exception.
2475
2476     This exception may be raised by PostProcessor's .run() method to
2477     indicate an error in the postprocessing task.
2478     """
2479
2480     def __init__(self, msg):
2481         super(PostProcessingError, self).__init__(msg)
2482         self.msg = msg
2483
2484
2485 class ExistingVideoReached(YoutubeDLError):
2486     """ --max-downloads limit has been reached. """
2487     pass
2488
2489
2490 class RejectedVideoReached(YoutubeDLError):
2491     """ --max-downloads limit has been reached. """
2492     pass
2493
2494
2495 class MaxDownloadsReached(YoutubeDLError):
2496     """ --max-downloads limit has been reached. """
2497     pass
2498
2499
2500 class UnavailableVideoError(YoutubeDLError):
2501     """Unavailable Format exception.
2502
2503     This exception will be thrown when a video is requested
2504     in a format that is not available for that video.
2505     """
2506     pass
2507
2508
2509 class ContentTooShortError(YoutubeDLError):
2510     """Content Too Short exception.
2511
2512     This exception may be raised by FileDownloader objects when a file they
2513     download is too small for what the server announced first, indicating
2514     the connection was probably interrupted.
2515     """
2516
2517     def __init__(self, downloaded, expected):
2518         super(ContentTooShortError, self).__init__(
2519             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2520         )
2521         # Both in bytes
2522         self.downloaded = downloaded
2523         self.expected = expected
2524
2525
2526 class XAttrMetadataError(YoutubeDLError):
2527     def __init__(self, code=None, msg='Unknown error'):
2528         super(XAttrMetadataError, self).__init__(msg)
2529         self.code = code
2530         self.msg = msg
2531
2532         # Parsing code and msg
2533         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2534                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2535             self.reason = 'NO_SPACE'
2536         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2537             self.reason = 'VALUE_TOO_LONG'
2538         else:
2539             self.reason = 'NOT_SUPPORTED'
2540
2541
2542 class XAttrUnavailableError(YoutubeDLError):
2543     pass
2544
2545
2546 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2547     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2548     # expected HTTP responses to meet HTTP/1.0 or later (see also
2549     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2550     if sys.version_info < (3, 0):
2551         kwargs['strict'] = True
2552     hc = http_class(*args, **compat_kwargs(kwargs))
2553     source_address = ydl_handler._params.get('source_address')
2554
2555     if source_address is not None:
2556         # This is to workaround _create_connection() from socket where it will try all
2557         # address data from getaddrinfo() including IPv6. This filters the result from
2558         # getaddrinfo() based on the source_address value.
2559         # This is based on the cpython socket.create_connection() function.
2560         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2561         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2562             host, port = address
2563             err = None
2564             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2565             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2566             ip_addrs = [addr for addr in addrs if addr[0] == af]
2567             if addrs and not ip_addrs:
2568                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2569                 raise socket.error(
2570                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2571                     % (ip_version, source_address[0]))
2572             for res in ip_addrs:
2573                 af, socktype, proto, canonname, sa = res
2574                 sock = None
2575                 try:
2576                     sock = socket.socket(af, socktype, proto)
2577                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2578                         sock.settimeout(timeout)
2579                     sock.bind(source_address)
2580                     sock.connect(sa)
2581                     err = None  # Explicitly break reference cycle
2582                     return sock
2583                 except socket.error as _:
2584                     err = _
2585                     if sock is not None:
2586                         sock.close()
2587             if err is not None:
2588                 raise err
2589             else:
2590                 raise socket.error('getaddrinfo returns an empty list')
2591         if hasattr(hc, '_create_connection'):
2592             hc._create_connection = _create_connection
2593         sa = (source_address, 0)
2594         if hasattr(hc, 'source_address'):  # Python 2.7+
2595             hc.source_address = sa
2596         else:  # Python 2.6
2597             def _hc_connect(self, *args, **kwargs):
2598                 sock = _create_connection(
2599                     (self.host, self.port), self.timeout, sa)
2600                 if is_https:
2601                     self.sock = ssl.wrap_socket(
2602                         sock, self.key_file, self.cert_file,
2603                         ssl_version=ssl.PROTOCOL_TLSv1)
2604                 else:
2605                     self.sock = sock
2606             hc.connect = functools.partial(_hc_connect, hc)
2607
2608     return hc
2609
2610
2611 def handle_youtubedl_headers(headers):
2612     filtered_headers = headers
2613
2614     if 'Youtubedl-no-compression' in filtered_headers:
2615         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2616         del filtered_headers['Youtubedl-no-compression']
2617
2618     return filtered_headers
2619
2620
2621 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2622     """Handler for HTTP requests and responses.
2623
2624     This class, when installed with an OpenerDirector, automatically adds
2625     the standard headers to every HTTP request and handles gzipped and
2626     deflated responses from web servers. If compression is to be avoided in
2627     a particular request, the original request in the program code only has
2628     to include the HTTP header "Youtubedl-no-compression", which will be
2629     removed before making the real request.
2630
2631     Part of this code was copied from:
2632
2633     http://techknack.net/python-urllib2-handlers/
2634
2635     Andrew Rowls, the author of that code, agreed to release it to the
2636     public domain.
2637     """
2638
2639     def __init__(self, params, *args, **kwargs):
2640         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2641         self._params = params
2642
2643     def http_open(self, req):
2644         conn_class = compat_http_client.HTTPConnection
2645
2646         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2647         if socks_proxy:
2648             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2649             del req.headers['Ytdl-socks-proxy']
2650
2651         return self.do_open(functools.partial(
2652             _create_http_connection, self, conn_class, False),
2653             req)
2654
2655     @staticmethod
2656     def deflate(data):
2657         if not data:
2658             return data
2659         try:
2660             return zlib.decompress(data, -zlib.MAX_WBITS)
2661         except zlib.error:
2662             return zlib.decompress(data)
2663
2664     def http_request(self, req):
2665         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2666         # always respected by websites, some tend to give out URLs with non percent-encoded
2667         # non-ASCII characters (see telemb.py, ard.py [#3412])
2668         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2669         # To work around aforementioned issue we will replace request's original URL with
2670         # percent-encoded one
2671         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2672         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2673         url = req.get_full_url()
2674         url_escaped = escape_url(url)
2675
2676         # Substitute URL if any change after escaping
2677         if url != url_escaped:
2678             req = update_Request(req, url=url_escaped)
2679
2680         for h, v in std_headers.items():
2681             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2682             # The dict keys are capitalized because of this bug by urllib
2683             if h.capitalize() not in req.headers:
2684                 req.add_header(h, v)
2685
2686         req.headers = handle_youtubedl_headers(req.headers)
2687
2688         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2689             # Python 2.6 is brain-dead when it comes to fragments
2690             req._Request__original = req._Request__original.partition('#')[0]
2691             req._Request__r_type = req._Request__r_type.partition('#')[0]
2692
2693         return req
2694
2695     def http_response(self, req, resp):
2696         old_resp = resp
2697         # gzip
2698         if resp.headers.get('Content-encoding', '') == 'gzip':
2699             content = resp.read()
2700             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2701             try:
2702                 uncompressed = io.BytesIO(gz.read())
2703             except IOError as original_ioerror:
2704                 # There may be junk add the end of the file
2705                 # See http://stackoverflow.com/q/4928560/35070 for details
2706                 for i in range(1, 1024):
2707                     try:
2708                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2709                         uncompressed = io.BytesIO(gz.read())
2710                     except IOError:
2711                         continue
2712                     break
2713                 else:
2714                     raise original_ioerror
2715             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2716             resp.msg = old_resp.msg
2717             del resp.headers['Content-encoding']
2718         # deflate
2719         if resp.headers.get('Content-encoding', '') == 'deflate':
2720             gz = io.BytesIO(self.deflate(resp.read()))
2721             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2722             resp.msg = old_resp.msg
2723             del resp.headers['Content-encoding']
2724         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2725         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2726         if 300 <= resp.code < 400:
2727             location = resp.headers.get('Location')
2728             if location:
2729                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2730                 if sys.version_info >= (3, 0):
2731                     location = location.encode('iso-8859-1').decode('utf-8')
2732                 else:
2733                     location = location.decode('utf-8')
2734                 location_escaped = escape_url(location)
2735                 if location != location_escaped:
2736                     del resp.headers['Location']
2737                     if sys.version_info < (3, 0):
2738                         location_escaped = location_escaped.encode('utf-8')
2739                     resp.headers['Location'] = location_escaped
2740         return resp
2741
2742     https_request = http_request
2743     https_response = http_response
2744
2745
2746 def make_socks_conn_class(base_class, socks_proxy):
2747     assert issubclass(base_class, (
2748         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2749
2750     url_components = compat_urlparse.urlparse(socks_proxy)
2751     if url_components.scheme.lower() == 'socks5':
2752         socks_type = ProxyType.SOCKS5
2753     elif url_components.scheme.lower() in ('socks', 'socks4'):
2754         socks_type = ProxyType.SOCKS4
2755     elif url_components.scheme.lower() == 'socks4a':
2756         socks_type = ProxyType.SOCKS4A
2757
2758     def unquote_if_non_empty(s):
2759         if not s:
2760             return s
2761         return compat_urllib_parse_unquote_plus(s)
2762
2763     proxy_args = (
2764         socks_type,
2765         url_components.hostname, url_components.port or 1080,
2766         True,  # Remote DNS
2767         unquote_if_non_empty(url_components.username),
2768         unquote_if_non_empty(url_components.password),
2769     )
2770
2771     class SocksConnection(base_class):
2772         def connect(self):
2773             self.sock = sockssocket()
2774             self.sock.setproxy(*proxy_args)
2775             if type(self.timeout) in (int, float):
2776                 self.sock.settimeout(self.timeout)
2777             self.sock.connect((self.host, self.port))
2778
2779             if isinstance(self, compat_http_client.HTTPSConnection):
2780                 if hasattr(self, '_context'):  # Python > 2.6
2781                     self.sock = self._context.wrap_socket(
2782                         self.sock, server_hostname=self.host)
2783                 else:
2784                     self.sock = ssl.wrap_socket(self.sock)
2785
2786     return SocksConnection
2787
2788
2789 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2790     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2791         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2792         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2793         self._params = params
2794
2795     def https_open(self, req):
2796         kwargs = {}
2797         conn_class = self._https_conn_class
2798
2799         if hasattr(self, '_context'):  # python > 2.6
2800             kwargs['context'] = self._context
2801         if hasattr(self, '_check_hostname'):  # python 3.x
2802             kwargs['check_hostname'] = self._check_hostname
2803
2804         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2805         if socks_proxy:
2806             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2807             del req.headers['Ytdl-socks-proxy']
2808
2809         return self.do_open(functools.partial(
2810             _create_http_connection, self, conn_class, True),
2811             req, **kwargs)
2812
2813
2814 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2815     """
2816     See [1] for cookie file format.
2817
2818     1. https://curl.haxx.se/docs/http-cookies.html
2819     """
2820     _HTTPONLY_PREFIX = '#HttpOnly_'
2821     _ENTRY_LEN = 7
2822     _HEADER = '''# Netscape HTTP Cookie File
2823 # This file is generated by yt-dlp.  Do not edit.
2824
2825 '''
2826     _CookieFileEntry = collections.namedtuple(
2827         'CookieFileEntry',
2828         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2829
2830     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2831         """
2832         Save cookies to a file.
2833
2834         Most of the code is taken from CPython 3.8 and slightly adapted
2835         to support cookie files with UTF-8 in both python 2 and 3.
2836         """
2837         if filename is None:
2838             if self.filename is not None:
2839                 filename = self.filename
2840             else:
2841                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2842
2843         # Store session cookies with `expires` set to 0 instead of an empty
2844         # string
2845         for cookie in self:
2846             if cookie.expires is None:
2847                 cookie.expires = 0
2848
2849         with io.open(filename, 'w', encoding='utf-8') as f:
2850             f.write(self._HEADER)
2851             now = time.time()
2852             for cookie in self:
2853                 if not ignore_discard and cookie.discard:
2854                     continue
2855                 if not ignore_expires and cookie.is_expired(now):
2856                     continue
2857                 if cookie.secure:
2858                     secure = 'TRUE'
2859                 else:
2860                     secure = 'FALSE'
2861                 if cookie.domain.startswith('.'):
2862                     initial_dot = 'TRUE'
2863                 else:
2864                     initial_dot = 'FALSE'
2865                 if cookie.expires is not None:
2866                     expires = compat_str(cookie.expires)
2867                 else:
2868                     expires = ''
2869                 if cookie.value is None:
2870                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2871                     # with no name, whereas http.cookiejar regards it as a
2872                     # cookie with no value.
2873                     name = ''
2874                     value = cookie.name
2875                 else:
2876                     name = cookie.name
2877                     value = cookie.value
2878                 f.write(
2879                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2880                                secure, expires, name, value]) + '\n')
2881
2882     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2883         """Load cookies from a file."""
2884         if filename is None:
2885             if self.filename is not None:
2886                 filename = self.filename
2887             else:
2888                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2889
2890         def prepare_line(line):
2891             if line.startswith(self._HTTPONLY_PREFIX):
2892                 line = line[len(self._HTTPONLY_PREFIX):]
2893             # comments and empty lines are fine
2894             if line.startswith('#') or not line.strip():
2895                 return line
2896             cookie_list = line.split('\t')
2897             if len(cookie_list) != self._ENTRY_LEN:
2898                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2899             cookie = self._CookieFileEntry(*cookie_list)
2900             if cookie.expires_at and not cookie.expires_at.isdigit():
2901                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2902             return line
2903
2904         cf = io.StringIO()
2905         with io.open(filename, encoding='utf-8') as f:
2906             for line in f:
2907                 try:
2908                     cf.write(prepare_line(line))
2909                 except compat_cookiejar.LoadError as e:
2910                     write_string(
2911                         'WARNING: skipping cookie file entry due to %s: %r\n'
2912                         % (e, line), sys.stderr)
2913                     continue
2914         cf.seek(0)
2915         self._really_load(cf, filename, ignore_discard, ignore_expires)
2916         # Session cookies are denoted by either `expires` field set to
2917         # an empty string or 0. MozillaCookieJar only recognizes the former
2918         # (see [1]). So we need force the latter to be recognized as session
2919         # cookies on our own.
2920         # Session cookies may be important for cookies-based authentication,
2921         # e.g. usually, when user does not check 'Remember me' check box while
2922         # logging in on a site, some important cookies are stored as session
2923         # cookies so that not recognizing them will result in failed login.
2924         # 1. https://bugs.python.org/issue17164
2925         for cookie in self:
2926             # Treat `expires=0` cookies as session cookies
2927             if cookie.expires == 0:
2928                 cookie.expires = None
2929                 cookie.discard = True
2930
2931
2932 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2933     def __init__(self, cookiejar=None):
2934         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2935
2936     def http_response(self, request, response):
2937         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2938         # characters in Set-Cookie HTTP header of last response (see
2939         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2940         # In order to at least prevent crashing we will percent encode Set-Cookie
2941         # header before HTTPCookieProcessor starts processing it.
2942         # if sys.version_info < (3, 0) and response.headers:
2943         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2944         #         set_cookie = response.headers.get(set_cookie_header)
2945         #         if set_cookie:
2946         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2947         #             if set_cookie != set_cookie_escaped:
2948         #                 del response.headers[set_cookie_header]
2949         #                 response.headers[set_cookie_header] = set_cookie_escaped
2950         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2951
2952     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2953     https_response = http_response
2954
2955
2956 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2957     """YoutubeDL redirect handler
2958
2959     The code is based on HTTPRedirectHandler implementation from CPython [1].
2960
2961     This redirect handler solves two issues:
2962      - ensures redirect URL is always unicode under python 2
2963      - introduces support for experimental HTTP response status code
2964        308 Permanent Redirect [2] used by some sites [3]
2965
2966     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2967     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2968     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2969     """
2970
2971     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2972
2973     def redirect_request(self, req, fp, code, msg, headers, newurl):
2974         """Return a Request or None in response to a redirect.
2975
2976         This is called by the http_error_30x methods when a
2977         redirection response is received.  If a redirection should
2978         take place, return a new Request to allow http_error_30x to
2979         perform the redirect.  Otherwise, raise HTTPError if no-one
2980         else should try to handle this url.  Return None if you can't
2981         but another Handler might.
2982         """
2983         m = req.get_method()
2984         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2985                  or code in (301, 302, 303) and m == "POST")):
2986             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2987         # Strictly (according to RFC 2616), 301 or 302 in response to
2988         # a POST MUST NOT cause a redirection without confirmation
2989         # from the user (of urllib.request, in this case).  In practice,
2990         # essentially all clients do redirect in this case, so we do
2991         # the same.
2992
2993         # On python 2 urlh.geturl() may sometimes return redirect URL
2994         # as byte string instead of unicode. This workaround allows
2995         # to force it always return unicode.
2996         if sys.version_info[0] < 3:
2997             newurl = compat_str(newurl)
2998
2999         # Be conciliant with URIs containing a space.  This is mainly
3000         # redundant with the more complete encoding done in http_error_302(),
3001         # but it is kept for compatibility with other callers.
3002         newurl = newurl.replace(' ', '%20')
3003
3004         CONTENT_HEADERS = ("content-length", "content-type")
3005         # NB: don't use dict comprehension for python 2.6 compatibility
3006         newheaders = dict((k, v) for k, v in req.headers.items()
3007                           if k.lower() not in CONTENT_HEADERS)
3008         return compat_urllib_request.Request(
3009             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3010             unverifiable=True)
3011
3012
3013 def extract_timezone(date_str):
3014     m = re.search(
3015         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3016         date_str)
3017     if not m:
3018         timezone = datetime.timedelta()
3019     else:
3020         date_str = date_str[:-len(m.group('tz'))]
3021         if not m.group('sign'):
3022             timezone = datetime.timedelta()
3023         else:
3024             sign = 1 if m.group('sign') == '+' else -1
3025             timezone = datetime.timedelta(
3026                 hours=sign * int(m.group('hours')),
3027                 minutes=sign * int(m.group('minutes')))
3028     return timezone, date_str
3029
3030
3031 def parse_iso8601(date_str, delimiter='T', timezone=None):
3032     """ Return a UNIX timestamp from the given date """
3033
3034     if date_str is None:
3035         return None
3036
3037     date_str = re.sub(r'\.[0-9]+', '', date_str)
3038
3039     if timezone is None:
3040         timezone, date_str = extract_timezone(date_str)
3041
3042     try:
3043         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3044         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3045         return calendar.timegm(dt.timetuple())
3046     except ValueError:
3047         pass
3048
3049
3050 def date_formats(day_first=True):
3051     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3052
3053
3054 def unified_strdate(date_str, day_first=True):
3055     """Return a string with the date in the format YYYYMMDD"""
3056
3057     if date_str is None:
3058         return None
3059     upload_date = None
3060     # Replace commas
3061     date_str = date_str.replace(',', ' ')
3062     # Remove AM/PM + timezone
3063     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3064     _, date_str = extract_timezone(date_str)
3065
3066     for expression in date_formats(day_first):
3067         try:
3068             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3069         except ValueError:
3070             pass
3071     if upload_date is None:
3072         timetuple = email.utils.parsedate_tz(date_str)
3073         if timetuple:
3074             try:
3075                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3076             except ValueError:
3077                 pass
3078     if upload_date is not None:
3079         return compat_str(upload_date)
3080
3081
3082 def unified_timestamp(date_str, day_first=True):
3083     if date_str is None:
3084         return None
3085
3086     date_str = re.sub(r'[,|]', '', date_str)
3087
3088     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3089     timezone, date_str = extract_timezone(date_str)
3090
3091     # Remove AM/PM + timezone
3092     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3093
3094     # Remove unrecognized timezones from ISO 8601 alike timestamps
3095     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3096     if m:
3097         date_str = date_str[:-len(m.group('tz'))]
3098
3099     # Python only supports microseconds, so remove nanoseconds
3100     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3101     if m:
3102         date_str = m.group(1)
3103
3104     for expression in date_formats(day_first):
3105         try:
3106             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3107             return calendar.timegm(dt.timetuple())
3108         except ValueError:
3109             pass
3110     timetuple = email.utils.parsedate_tz(date_str)
3111     if timetuple:
3112         return calendar.timegm(timetuple) + pm_delta * 3600
3113
3114
3115 def determine_ext(url, default_ext='unknown_video'):
3116     if url is None or '.' not in url:
3117         return default_ext
3118     guess = url.partition('?')[0].rpartition('.')[2]
3119     if re.match(r'^[A-Za-z0-9]+$', guess):
3120         return guess
3121     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3122     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3123         return guess.rstrip('/')
3124     else:
3125         return default_ext
3126
3127
3128 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3129     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3130
3131
3132 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3133     """
3134     Return a datetime object from a string in the format YYYYMMDD or
3135     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3136
3137     format: string date format used to return datetime object from
3138     precision: round the time portion of a datetime object.
3139                 auto|microsecond|second|minute|hour|day.
3140                 auto: round to the unit provided in date_str (if applicable).
3141     """
3142     auto_precision = False
3143     if precision == 'auto':
3144         auto_precision = True
3145         precision = 'microsecond'
3146     today = datetime_round(datetime.datetime.now(), precision)
3147     if date_str in ('now', 'today'):
3148         return today
3149     if date_str == 'yesterday':
3150         return today - datetime.timedelta(days=1)
3151     match = re.match(
3152         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3153         date_str)
3154     if match is not None:
3155         start_time = datetime_from_str(match.group('start'), precision, format)
3156         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3157         unit = match.group('unit')
3158         if unit == 'month' or unit == 'year':
3159             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3160             unit = 'day'
3161         else:
3162             if unit == 'week':
3163                 unit = 'day'
3164                 time *= 7
3165             delta = datetime.timedelta(**{unit + 's': time})
3166             new_date = start_time + delta
3167         if auto_precision:
3168             return datetime_round(new_date, unit)
3169         return new_date
3170
3171     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3172
3173
3174 def date_from_str(date_str, format='%Y%m%d'):
3175     """
3176     Return a datetime object from a string in the format YYYYMMDD or
3177     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3178
3179     format: string date format used to return datetime object from
3180     """
3181     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3182
3183
3184 def datetime_add_months(dt, months):
3185     """Increment/Decrement a datetime object by months."""
3186     month = dt.month + months - 1
3187     year = dt.year + month // 12
3188     month = month % 12 + 1
3189     day = min(dt.day, calendar.monthrange(year, month)[1])
3190     return dt.replace(year, month, day)
3191
3192
3193 def datetime_round(dt, precision='day'):
3194     """
3195     Round a datetime object's time to a specific precision
3196     """
3197     if precision == 'microsecond':
3198         return dt
3199
3200     unit_seconds = {
3201         'day': 86400,
3202         'hour': 3600,
3203         'minute': 60,
3204         'second': 1,
3205     }
3206     roundto = lambda x, n: ((x + n / 2) // n) * n
3207     timestamp = calendar.timegm(dt.timetuple())
3208     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3209
3210
3211 def hyphenate_date(date_str):
3212     """
3213     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3214     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3215     if match is not None:
3216         return '-'.join(match.groups())
3217     else:
3218         return date_str
3219
3220
3221 class DateRange(object):
3222     """Represents a time interval between two dates"""
3223
3224     def __init__(self, start=None, end=None):
3225         """start and end must be strings in the format accepted by date"""
3226         if start is not None:
3227             self.start = date_from_str(start)
3228         else:
3229             self.start = datetime.datetime.min.date()
3230         if end is not None:
3231             self.end = date_from_str(end)
3232         else:
3233             self.end = datetime.datetime.max.date()
3234         if self.start > self.end:
3235             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3236
3237     @classmethod
3238     def day(cls, day):
3239         """Returns a range that only contains the given day"""
3240         return cls(day, day)
3241
3242     def __contains__(self, date):
3243         """Check if the date is in the range"""
3244         if not isinstance(date, datetime.date):
3245             date = date_from_str(date)
3246         return self.start <= date <= self.end
3247
3248     def __str__(self):
3249         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3250
3251
3252 def platform_name():
3253     """ Returns the platform name as a compat_str """
3254     res = platform.platform()
3255     if isinstance(res, bytes):
3256         res = res.decode(preferredencoding())
3257
3258     assert isinstance(res, compat_str)
3259     return res
3260
3261
3262 def _windows_write_string(s, out):
3263     """ Returns True if the string was written using special methods,
3264     False if it has yet to be written out."""
3265     # Adapted from http://stackoverflow.com/a/3259271/35070
3266
3267     import ctypes
3268     import ctypes.wintypes
3269
3270     WIN_OUTPUT_IDS = {
3271         1: -11,
3272         2: -12,
3273     }
3274
3275     try:
3276         fileno = out.fileno()
3277     except AttributeError:
3278         # If the output stream doesn't have a fileno, it's virtual
3279         return False
3280     except io.UnsupportedOperation:
3281         # Some strange Windows pseudo files?
3282         return False
3283     if fileno not in WIN_OUTPUT_IDS:
3284         return False
3285
3286     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3287         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3288         ('GetStdHandle', ctypes.windll.kernel32))
3289     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3290
3291     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3292         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3293         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3294         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3295     written = ctypes.wintypes.DWORD(0)
3296
3297     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3298     FILE_TYPE_CHAR = 0x0002
3299     FILE_TYPE_REMOTE = 0x8000
3300     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3301         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3302         ctypes.POINTER(ctypes.wintypes.DWORD))(
3303         ('GetConsoleMode', ctypes.windll.kernel32))
3304     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3305
3306     def not_a_console(handle):
3307         if handle == INVALID_HANDLE_VALUE or handle is None:
3308             return True
3309         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3310                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3311
3312     if not_a_console(h):
3313         return False
3314
3315     def next_nonbmp_pos(s):
3316         try:
3317             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3318         except StopIteration:
3319             return len(s)
3320
3321     while s:
3322         count = min(next_nonbmp_pos(s), 1024)
3323
3324         ret = WriteConsoleW(
3325             h, s, count if count else 2, ctypes.byref(written), None)
3326         if ret == 0:
3327             raise OSError('Failed to write string')
3328         if not count:  # We just wrote a non-BMP character
3329             assert written.value == 2
3330             s = s[1:]
3331         else:
3332             assert written.value > 0
3333             s = s[written.value:]
3334     return True
3335
3336
3337 def write_string(s, out=None, encoding=None):
3338     if out is None:
3339         out = sys.stderr
3340     assert type(s) == compat_str
3341
3342     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3343         if _windows_write_string(s, out):
3344             return
3345
3346     if ('b' in getattr(out, 'mode', '')
3347             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3348         byt = s.encode(encoding or preferredencoding(), 'ignore')
3349         out.write(byt)
3350     elif hasattr(out, 'buffer'):
3351         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3352         byt = s.encode(enc, 'ignore')
3353         out.buffer.write(byt)
3354     else:
3355         out.write(s)
3356     out.flush()
3357
3358
3359 def bytes_to_intlist(bs):
3360     if not bs:
3361         return []
3362     if isinstance(bs[0], int):  # Python 3
3363         return list(bs)
3364     else:
3365         return [ord(c) for c in bs]
3366
3367
3368 def intlist_to_bytes(xs):
3369     if not xs:
3370         return b''
3371     return compat_struct_pack('%dB' % len(xs), *xs)
3372
3373
3374 # Cross-platform file locking
3375 if sys.platform == 'win32':
3376     import ctypes.wintypes
3377     import msvcrt
3378
3379     class OVERLAPPED(ctypes.Structure):
3380         _fields_ = [
3381             ('Internal', ctypes.wintypes.LPVOID),
3382             ('InternalHigh', ctypes.wintypes.LPVOID),
3383             ('Offset', ctypes.wintypes.DWORD),
3384             ('OffsetHigh', ctypes.wintypes.DWORD),
3385             ('hEvent', ctypes.wintypes.HANDLE),
3386         ]
3387
3388     kernel32 = ctypes.windll.kernel32
3389     LockFileEx = kernel32.LockFileEx
3390     LockFileEx.argtypes = [
3391         ctypes.wintypes.HANDLE,     # hFile
3392         ctypes.wintypes.DWORD,      # dwFlags
3393         ctypes.wintypes.DWORD,      # dwReserved
3394         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3395         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3396         ctypes.POINTER(OVERLAPPED)  # Overlapped
3397     ]
3398     LockFileEx.restype = ctypes.wintypes.BOOL
3399     UnlockFileEx = kernel32.UnlockFileEx
3400     UnlockFileEx.argtypes = [
3401         ctypes.wintypes.HANDLE,     # hFile
3402         ctypes.wintypes.DWORD,      # dwReserved
3403         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3404         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3405         ctypes.POINTER(OVERLAPPED)  # Overlapped
3406     ]
3407     UnlockFileEx.restype = ctypes.wintypes.BOOL
3408     whole_low = 0xffffffff
3409     whole_high = 0x7fffffff
3410
3411     def _lock_file(f, exclusive):
3412         overlapped = OVERLAPPED()
3413         overlapped.Offset = 0
3414         overlapped.OffsetHigh = 0
3415         overlapped.hEvent = 0
3416         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3417         handle = msvcrt.get_osfhandle(f.fileno())
3418         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3419                           whole_low, whole_high, f._lock_file_overlapped_p):
3420             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3421
3422     def _unlock_file(f):
3423         assert f._lock_file_overlapped_p
3424         handle = msvcrt.get_osfhandle(f.fileno())
3425         if not UnlockFileEx(handle, 0,
3426                             whole_low, whole_high, f._lock_file_overlapped_p):
3427             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3428
3429 else:
3430     # Some platforms, such as Jython, is missing fcntl
3431     try:
3432         import fcntl
3433
3434         def _lock_file(f, exclusive):
3435             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3436
3437         def _unlock_file(f):
3438             fcntl.flock(f, fcntl.LOCK_UN)
3439     except ImportError:
3440         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3441
3442         def _lock_file(f, exclusive):
3443             raise IOError(UNSUPPORTED_MSG)
3444
3445         def _unlock_file(f):
3446             raise IOError(UNSUPPORTED_MSG)
3447
3448
3449 class locked_file(object):
3450     def __init__(self, filename, mode, encoding=None):
3451         assert mode in ['r', 'a', 'w']
3452         self.f = io.open(filename, mode, encoding=encoding)
3453         self.mode = mode
3454
3455     def __enter__(self):
3456         exclusive = self.mode != 'r'
3457         try:
3458             _lock_file(self.f, exclusive)
3459         except IOError:
3460             self.f.close()
3461             raise
3462         return self
3463
3464     def __exit__(self, etype, value, traceback):
3465         try:
3466             _unlock_file(self.f)
3467         finally:
3468             self.f.close()
3469
3470     def __iter__(self):
3471         return iter(self.f)
3472
3473     def write(self, *args):
3474         return self.f.write(*args)
3475
3476     def read(self, *args):
3477         return self.f.read(*args)
3478
3479
3480 def get_filesystem_encoding():
3481     encoding = sys.getfilesystemencoding()
3482     return encoding if encoding is not None else 'utf-8'
3483
3484
3485 def shell_quote(args):
3486     quoted_args = []
3487     encoding = get_filesystem_encoding()
3488     for a in args:
3489         if isinstance(a, bytes):
3490             # We may get a filename encoded with 'encodeFilename'
3491             a = a.decode(encoding)
3492         quoted_args.append(compat_shlex_quote(a))
3493     return ' '.join(quoted_args)
3494
3495
3496 def smuggle_url(url, data):
3497     """ Pass additional data in a URL for internal use. """
3498
3499     url, idata = unsmuggle_url(url, {})
3500     data.update(idata)
3501     sdata = compat_urllib_parse_urlencode(
3502         {'__youtubedl_smuggle': json.dumps(data)})
3503     return url + '#' + sdata
3504
3505
3506 def unsmuggle_url(smug_url, default=None):
3507     if '#__youtubedl_smuggle' not in smug_url:
3508         return smug_url, default
3509     url, _, sdata = smug_url.rpartition('#')
3510     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3511     data = json.loads(jsond)
3512     return url, data
3513
3514
3515 def format_bytes(bytes):
3516     if bytes is None:
3517         return 'N/A'
3518     if type(bytes) is str:
3519         bytes = float(bytes)
3520     if bytes == 0.0:
3521         exponent = 0
3522     else:
3523         exponent = int(math.log(bytes, 1024.0))
3524     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3525     converted = float(bytes) / float(1024 ** exponent)
3526     return '%.2f%s' % (converted, suffix)
3527
3528
3529 def lookup_unit_table(unit_table, s):
3530     units_re = '|'.join(re.escape(u) for u in unit_table)
3531     m = re.match(
3532         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3533     if not m:
3534         return None
3535     num_str = m.group('num').replace(',', '.')
3536     mult = unit_table[m.group('unit')]
3537     return int(float(num_str) * mult)
3538
3539
3540 def parse_filesize(s):
3541     if s is None:
3542         return None
3543
3544     # The lower-case forms are of course incorrect and unofficial,
3545     # but we support those too
3546     _UNIT_TABLE = {
3547         'B': 1,
3548         'b': 1,
3549         'bytes': 1,
3550         'KiB': 1024,
3551         'KB': 1000,
3552         'kB': 1024,
3553         'Kb': 1000,
3554         'kb': 1000,
3555         'kilobytes': 1000,
3556         'kibibytes': 1024,
3557         'MiB': 1024 ** 2,
3558         'MB': 1000 ** 2,
3559         'mB': 1024 ** 2,
3560         'Mb': 1000 ** 2,
3561         'mb': 1000 ** 2,
3562         'megabytes': 1000 ** 2,
3563         'mebibytes': 1024 ** 2,
3564         'GiB': 1024 ** 3,
3565         'GB': 1000 ** 3,
3566         'gB': 1024 ** 3,
3567         'Gb': 1000 ** 3,
3568         'gb': 1000 ** 3,
3569         'gigabytes': 1000 ** 3,
3570         'gibibytes': 1024 ** 3,
3571         'TiB': 1024 ** 4,
3572         'TB': 1000 ** 4,
3573         'tB': 1024 ** 4,
3574         'Tb': 1000 ** 4,
3575         'tb': 1000 ** 4,
3576         'terabytes': 1000 ** 4,
3577         'tebibytes': 1024 ** 4,
3578         'PiB': 1024 ** 5,
3579         'PB': 1000 ** 5,
3580         'pB': 1024 ** 5,
3581         'Pb': 1000 ** 5,
3582         'pb': 1000 ** 5,
3583         'petabytes': 1000 ** 5,
3584         'pebibytes': 1024 ** 5,
3585         'EiB': 1024 ** 6,
3586         'EB': 1000 ** 6,
3587         'eB': 1024 ** 6,
3588         'Eb': 1000 ** 6,
3589         'eb': 1000 ** 6,
3590         'exabytes': 1000 ** 6,
3591         'exbibytes': 1024 ** 6,
3592         'ZiB': 1024 ** 7,
3593         'ZB': 1000 ** 7,
3594         'zB': 1024 ** 7,
3595         'Zb': 1000 ** 7,
3596         'zb': 1000 ** 7,
3597         'zettabytes': 1000 ** 7,
3598         'zebibytes': 1024 ** 7,
3599         'YiB': 1024 ** 8,
3600         'YB': 1000 ** 8,
3601         'yB': 1024 ** 8,
3602         'Yb': 1000 ** 8,
3603         'yb': 1000 ** 8,
3604         'yottabytes': 1000 ** 8,
3605         'yobibytes': 1024 ** 8,
3606     }
3607
3608     return lookup_unit_table(_UNIT_TABLE, s)
3609
3610
3611 def parse_count(s):
3612     if s is None:
3613         return None
3614
3615     s = s.strip()
3616
3617     if re.match(r'^[\d,.]+$', s):
3618         return str_to_int(s)
3619
3620     _UNIT_TABLE = {
3621         'k': 1000,
3622         'K': 1000,
3623         'm': 1000 ** 2,
3624         'M': 1000 ** 2,
3625         'kk': 1000 ** 2,
3626         'KK': 1000 ** 2,
3627     }
3628
3629     return lookup_unit_table(_UNIT_TABLE, s)
3630
3631
3632 def parse_resolution(s):
3633     if s is None:
3634         return {}
3635
3636     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3637     if mobj:
3638         return {
3639             'width': int(mobj.group('w')),
3640             'height': int(mobj.group('h')),
3641         }
3642
3643     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3644     if mobj:
3645         return {'height': int(mobj.group(1))}
3646
3647     mobj = re.search(r'\b([48])[kK]\b', s)
3648     if mobj:
3649         return {'height': int(mobj.group(1)) * 540}
3650
3651     return {}
3652
3653
3654 def parse_bitrate(s):
3655     if not isinstance(s, compat_str):
3656         return
3657     mobj = re.search(r'\b(\d+)\s*kbps', s)
3658     if mobj:
3659         return int(mobj.group(1))
3660
3661
3662 def month_by_name(name, lang='en'):
3663     """ Return the number of a month by (locale-independently) English name """
3664
3665     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3666
3667     try:
3668         return month_names.index(name) + 1
3669     except ValueError:
3670         return None
3671
3672
3673 def month_by_abbreviation(abbrev):
3674     """ Return the number of a month by (locale-independently) English
3675         abbreviations """
3676
3677     try:
3678         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3679     except ValueError:
3680         return None
3681
3682
3683 def fix_xml_ampersands(xml_str):
3684     """Replace all the '&' by '&amp;' in XML"""
3685     return re.sub(
3686         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3687         '&amp;',
3688         xml_str)
3689
3690
3691 def setproctitle(title):
3692     assert isinstance(title, compat_str)
3693
3694     # ctypes in Jython is not complete
3695     # http://bugs.jython.org/issue2148
3696     if sys.platform.startswith('java'):
3697         return
3698
3699     try:
3700         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3701     except OSError:
3702         return
3703     except TypeError:
3704         # LoadLibrary in Windows Python 2.7.13 only expects
3705         # a bytestring, but since unicode_literals turns
3706         # every string into a unicode string, it fails.
3707         return
3708     title_bytes = title.encode('utf-8')
3709     buf = ctypes.create_string_buffer(len(title_bytes))
3710     buf.value = title_bytes
3711     try:
3712         libc.prctl(15, buf, 0, 0, 0)
3713     except AttributeError:
3714         return  # Strange libc, just skip this
3715
3716
3717 def remove_start(s, start):
3718     return s[len(start):] if s is not None and s.startswith(start) else s
3719
3720
3721 def remove_end(s, end):
3722     return s[:-len(end)] if s is not None and s.endswith(end) else s
3723
3724
3725 def remove_quotes(s):
3726     if s is None or len(s) < 2:
3727         return s
3728     for quote in ('"', "'", ):
3729         if s[0] == quote and s[-1] == quote:
3730             return s[1:-1]
3731     return s
3732
3733
3734 def get_domain(url):
3735     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3736     return domain.group('domain') if domain else None
3737
3738
3739 def url_basename(url):
3740     path = compat_urlparse.urlparse(url).path
3741     return path.strip('/').split('/')[-1]
3742
3743
3744 def base_url(url):
3745     return re.match(r'https?://[^?#&]+/', url).group()
3746
3747
3748 def urljoin(base, path):
3749     if isinstance(path, bytes):
3750         path = path.decode('utf-8')
3751     if not isinstance(path, compat_str) or not path:
3752         return None
3753     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3754         return path
3755     if isinstance(base, bytes):
3756         base = base.decode('utf-8')
3757     if not isinstance(base, compat_str) or not re.match(
3758             r'^(?:https?:)?//', base):
3759         return None
3760     return compat_urlparse.urljoin(base, path)
3761
3762
3763 class HEADRequest(compat_urllib_request.Request):
3764     def get_method(self):
3765         return 'HEAD'
3766
3767
3768 class PUTRequest(compat_urllib_request.Request):
3769     def get_method(self):
3770         return 'PUT'
3771
3772
3773 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3774     if get_attr:
3775         if v is not None:
3776             v = getattr(v, get_attr, None)
3777     if v == '':
3778         v = None
3779     if v is None:
3780         return default
3781     try:
3782         return int(v) * invscale // scale
3783     except (ValueError, TypeError):
3784         return default
3785
3786
3787 def str_or_none(v, default=None):
3788     return default if v is None else compat_str(v)
3789
3790
3791 def str_to_int(int_str):
3792     """ A more relaxed version of int_or_none """
3793     if isinstance(int_str, compat_integer_types):
3794         return int_str
3795     elif isinstance(int_str, compat_str):
3796         int_str = re.sub(r'[,\.\+]', '', int_str)
3797         return int_or_none(int_str)
3798
3799
3800 def float_or_none(v, scale=1, invscale=1, default=None):
3801     if v is None:
3802         return default
3803     try:
3804         return float(v) * invscale / scale
3805     except (ValueError, TypeError):
3806         return default
3807
3808
3809 def bool_or_none(v, default=None):
3810     return v if isinstance(v, bool) else default
3811
3812
3813 def strip_or_none(v, default=None):
3814     return v.strip() if isinstance(v, compat_str) else default
3815
3816
3817 def url_or_none(url):
3818     if not url or not isinstance(url, compat_str):
3819         return None
3820     url = url.strip()
3821     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3822
3823
3824 def strftime_or_none(timestamp, date_format, default=None):
3825     datetime_object = None
3826     try:
3827         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3828             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3829         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3830             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3831         return datetime_object.strftime(date_format)
3832     except (ValueError, TypeError, AttributeError):
3833         return default
3834
3835
3836 def parse_duration(s):
3837     if not isinstance(s, compat_basestring):
3838         return None
3839
3840     s = s.strip()
3841
3842     days, hours, mins, secs, ms = [None] * 5
3843     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3844     if m:
3845         days, hours, mins, secs, ms = m.groups()
3846     else:
3847         m = re.match(
3848             r'''(?ix)(?:P?
3849                 (?:
3850                     [0-9]+\s*y(?:ears?)?\s*
3851                 )?
3852                 (?:
3853                     [0-9]+\s*m(?:onths?)?\s*
3854                 )?
3855                 (?:
3856                     [0-9]+\s*w(?:eeks?)?\s*
3857                 )?
3858                 (?:
3859                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3860                 )?
3861                 T)?
3862                 (?:
3863                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3864                 )?
3865                 (?:
3866                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3867                 )?
3868                 (?:
3869                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3870                 )?Z?$''', s)
3871         if m:
3872             days, hours, mins, secs, ms = m.groups()
3873         else:
3874             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3875             if m:
3876                 hours, mins = m.groups()
3877             else:
3878                 return None
3879
3880     duration = 0
3881     if secs:
3882         duration += float(secs)
3883     if mins:
3884         duration += float(mins) * 60
3885     if hours:
3886         duration += float(hours) * 60 * 60
3887     if days:
3888         duration += float(days) * 24 * 60 * 60
3889     if ms:
3890         duration += float(ms)
3891     return duration
3892
3893
3894 def prepend_extension(filename, ext, expected_real_ext=None):
3895     name, real_ext = os.path.splitext(filename)
3896     return (
3897         '{0}.{1}{2}'.format(name, ext, real_ext)
3898         if not expected_real_ext or real_ext[1:] == expected_real_ext
3899         else '{0}.{1}'.format(filename, ext))
3900
3901
3902 def replace_extension(filename, ext, expected_real_ext=None):
3903     name, real_ext = os.path.splitext(filename)
3904     return '{0}.{1}'.format(
3905         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3906         ext)
3907
3908
3909 def check_executable(exe, args=[]):
3910     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3911     args can be a list of arguments for a short output (like -version) """
3912     try:
3913         process_communicate_or_kill(subprocess.Popen(
3914             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3915     except OSError:
3916         return False
3917     return exe
3918
3919
3920 def get_exe_version(exe, args=['--version'],
3921                     version_re=None, unrecognized='present'):
3922     """ Returns the version of the specified executable,
3923     or False if the executable is not present """
3924     try:
3925         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3926         # SIGTTOU if yt-dlp is run in the background.
3927         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3928         out, _ = process_communicate_or_kill(subprocess.Popen(
3929             [encodeArgument(exe)] + args,
3930             stdin=subprocess.PIPE,
3931             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3932     except OSError:
3933         return False
3934     if isinstance(out, bytes):  # Python 2.x
3935         out = out.decode('ascii', 'ignore')
3936     return detect_exe_version(out, version_re, unrecognized)
3937
3938
3939 def detect_exe_version(output, version_re=None, unrecognized='present'):
3940     assert isinstance(output, compat_str)
3941     if version_re is None:
3942         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3943     m = re.search(version_re, output)
3944     if m:
3945         return m.group(1)
3946     else:
3947         return unrecognized
3948
3949
3950 class LazyList(collections.Sequence):
3951     ''' Lazy immutable list from an iterable
3952     Note that slices of a LazyList are lists and not LazyList'''
3953
3954     def __init__(self, iterable):
3955         self.__iterable = iter(iterable)
3956         self.__cache = []
3957         self.__reversed = False
3958
3959     def __iter__(self):
3960         if self.__reversed:
3961             # We need to consume the entire iterable to iterate in reverse
3962             yield from self.exhaust()[::-1]
3963             return
3964         yield from self.__cache
3965         for item in self.__iterable:
3966             self.__cache.append(item)
3967             yield item
3968
3969     def exhaust(self):
3970         ''' Evaluate the entire iterable '''
3971         self.__cache.extend(self.__iterable)
3972         return self.__cache
3973
3974     @staticmethod
3975     def _reverse_index(x):
3976         return -(x + 1)
3977
3978     def __getitem__(self, idx):
3979         if isinstance(idx, slice):
3980             step = idx.step or 1
3981             start = idx.start if idx.start is not None else 0 if step > 0 else -1
3982             stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
3983             if self.__reversed:
3984                 start, stop, step = map(self._reverse_index, (start, stop, step))
3985                 idx = slice(start, stop, step)
3986         elif isinstance(idx, int):
3987             if self.__reversed:
3988                 idx = self._reverse_index(idx)
3989             start = stop = idx
3990         else:
3991             raise TypeError('indices must be integers or slices')
3992         if start < 0 or stop < 0:
3993             # We need to consume the entire iterable to be able to slice from the end
3994             # Obviously, never use this with infinite iterables
3995             return self.exhaust()[idx]
3996
3997         n = max(start, stop) - len(self.__cache) + 1
3998         if n > 0:
3999             self.__cache.extend(itertools.islice(self.__iterable, n))
4000         return self.__cache[idx]
4001
4002     def __bool__(self):
4003         try:
4004             self[-1] if self.__reversed else self[0]
4005         except IndexError:
4006             return False
4007         return True
4008
4009     def __len__(self):
4010         self.exhaust()
4011         return len(self.__cache)
4012
4013     def __reversed__(self):
4014         self.__reversed = not self.__reversed
4015         return self
4016
4017     def __repr__(self):
4018         # repr and str should mimic a list. So we exhaust the iterable
4019         return repr(self.exhaust())
4020
4021     def __str__(self):
4022         return repr(self.exhaust())
4023
4024
4025 class PagedList(object):
4026     def __len__(self):
4027         # This is only useful for tests
4028         return len(self.getslice())
4029
4030     def getslice(self, start, end):
4031         raise NotImplementedError('This method must be implemented by subclasses')
4032
4033     def __getitem__(self, idx):
4034         if not isinstance(idx, int) or idx < 0:
4035             raise TypeError('indices must be non-negative integers')
4036         entries = self.getslice(idx, idx + 1)
4037         return entries[0] if entries else None
4038
4039
4040 class OnDemandPagedList(PagedList):
4041     def __init__(self, pagefunc, pagesize, use_cache=True):
4042         self._pagefunc = pagefunc
4043         self._pagesize = pagesize
4044         self._use_cache = use_cache
4045         if use_cache:
4046             self._cache = {}
4047
4048     def getslice(self, start=0, end=None):
4049         res = []
4050         for pagenum in itertools.count(start // self._pagesize):
4051             firstid = pagenum * self._pagesize
4052             nextfirstid = pagenum * self._pagesize + self._pagesize
4053             if start >= nextfirstid:
4054                 continue
4055
4056             page_results = None
4057             if self._use_cache:
4058                 page_results = self._cache.get(pagenum)
4059             if page_results is None:
4060                 page_results = list(self._pagefunc(pagenum))
4061             if self._use_cache:
4062                 self._cache[pagenum] = page_results
4063
4064             startv = (
4065                 start % self._pagesize
4066                 if firstid <= start < nextfirstid
4067                 else 0)
4068
4069             endv = (
4070                 ((end - 1) % self._pagesize) + 1
4071                 if (end is not None and firstid <= end <= nextfirstid)
4072                 else None)
4073
4074             if startv != 0 or endv is not None:
4075                 page_results = page_results[startv:endv]
4076             res.extend(page_results)
4077
4078             # A little optimization - if current page is not "full", ie. does
4079             # not contain page_size videos then we can assume that this page
4080             # is the last one - there are no more ids on further pages -
4081             # i.e. no need to query again.
4082             if len(page_results) + startv < self._pagesize:
4083                 break
4084
4085             # If we got the whole page, but the next page is not interesting,
4086             # break out early as well
4087             if end == nextfirstid:
4088                 break
4089         return res
4090
4091
4092 class InAdvancePagedList(PagedList):
4093     def __init__(self, pagefunc, pagecount, pagesize):
4094         self._pagefunc = pagefunc
4095         self._pagecount = pagecount
4096         self._pagesize = pagesize
4097
4098     def getslice(self, start=0, end=None):
4099         res = []
4100         start_page = start // self._pagesize
4101         end_page = (
4102             self._pagecount if end is None else (end // self._pagesize + 1))
4103         skip_elems = start - start_page * self._pagesize
4104         only_more = None if end is None else end - start
4105         for pagenum in range(start_page, end_page):
4106             page = list(self._pagefunc(pagenum))
4107             if skip_elems:
4108                 page = page[skip_elems:]
4109                 skip_elems = None
4110             if only_more is not None:
4111                 if len(page) < only_more:
4112                     only_more -= len(page)
4113                 else:
4114                     page = page[:only_more]
4115                     res.extend(page)
4116                     break
4117             res.extend(page)
4118         return res
4119
4120
4121 def uppercase_escape(s):
4122     unicode_escape = codecs.getdecoder('unicode_escape')
4123     return re.sub(
4124         r'\\U[0-9a-fA-F]{8}',
4125         lambda m: unicode_escape(m.group(0))[0],
4126         s)
4127
4128
4129 def lowercase_escape(s):
4130     unicode_escape = codecs.getdecoder('unicode_escape')
4131     return re.sub(
4132         r'\\u[0-9a-fA-F]{4}',
4133         lambda m: unicode_escape(m.group(0))[0],
4134         s)
4135
4136
4137 def escape_rfc3986(s):
4138     """Escape non-ASCII characters as suggested by RFC 3986"""
4139     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4140         s = s.encode('utf-8')
4141     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4142
4143
4144 def escape_url(url):
4145     """Escape URL as suggested by RFC 3986"""
4146     url_parsed = compat_urllib_parse_urlparse(url)
4147     return url_parsed._replace(
4148         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4149         path=escape_rfc3986(url_parsed.path),
4150         params=escape_rfc3986(url_parsed.params),
4151         query=escape_rfc3986(url_parsed.query),
4152         fragment=escape_rfc3986(url_parsed.fragment)
4153     ).geturl()
4154
4155
4156 def read_batch_urls(batch_fd):
4157     def fixup(url):
4158         if not isinstance(url, compat_str):
4159             url = url.decode('utf-8', 'replace')
4160         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4161         for bom in BOM_UTF8:
4162             if url.startswith(bom):
4163                 url = url[len(bom):]
4164         url = url.lstrip()
4165         if not url or url.startswith(('#', ';', ']')):
4166             return False
4167         # "#" cannot be stripped out since it is part of the URI
4168         # However, it can be safely stipped out if follwing a whitespace
4169         return re.split(r'\s#', url, 1)[0].rstrip()
4170
4171     with contextlib.closing(batch_fd) as fd:
4172         return [url for url in map(fixup, fd) if url]
4173
4174
4175 def urlencode_postdata(*args, **kargs):
4176     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4177
4178
4179 def update_url_query(url, query):
4180     if not query:
4181         return url
4182     parsed_url = compat_urlparse.urlparse(url)
4183     qs = compat_parse_qs(parsed_url.query)
4184     qs.update(query)
4185     return compat_urlparse.urlunparse(parsed_url._replace(
4186         query=compat_urllib_parse_urlencode(qs, True)))
4187
4188
4189 def update_Request(req, url=None, data=None, headers={}, query={}):
4190     req_headers = req.headers.copy()
4191     req_headers.update(headers)
4192     req_data = data or req.data
4193     req_url = update_url_query(url or req.get_full_url(), query)
4194     req_get_method = req.get_method()
4195     if req_get_method == 'HEAD':
4196         req_type = HEADRequest
4197     elif req_get_method == 'PUT':
4198         req_type = PUTRequest
4199     else:
4200         req_type = compat_urllib_request.Request
4201     new_req = req_type(
4202         req_url, data=req_data, headers=req_headers,
4203         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4204     if hasattr(req, 'timeout'):
4205         new_req.timeout = req.timeout
4206     return new_req
4207
4208
4209 def _multipart_encode_impl(data, boundary):
4210     content_type = 'multipart/form-data; boundary=%s' % boundary
4211
4212     out = b''
4213     for k, v in data.items():
4214         out += b'--' + boundary.encode('ascii') + b'\r\n'
4215         if isinstance(k, compat_str):
4216             k = k.encode('utf-8')
4217         if isinstance(v, compat_str):
4218             v = v.encode('utf-8')
4219         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4220         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4221         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4222         if boundary.encode('ascii') in content:
4223             raise ValueError('Boundary overlaps with data')
4224         out += content
4225
4226     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4227
4228     return out, content_type
4229
4230
4231 def multipart_encode(data, boundary=None):
4232     '''
4233     Encode a dict to RFC 7578-compliant form-data
4234
4235     data:
4236         A dict where keys and values can be either Unicode or bytes-like
4237         objects.
4238     boundary:
4239         If specified a Unicode object, it's used as the boundary. Otherwise
4240         a random boundary is generated.
4241
4242     Reference: https://tools.ietf.org/html/rfc7578
4243     '''
4244     has_specified_boundary = boundary is not None
4245
4246     while True:
4247         if boundary is None:
4248             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4249
4250         try:
4251             out, content_type = _multipart_encode_impl(data, boundary)
4252             break
4253         except ValueError:
4254             if has_specified_boundary:
4255                 raise
4256             boundary = None
4257
4258     return out, content_type
4259
4260
4261 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4262     if isinstance(key_or_keys, (list, tuple)):
4263         for key in key_or_keys:
4264             if key not in d or d[key] is None or skip_false_values and not d[key]:
4265                 continue
4266             return d[key]
4267         return default
4268     return d.get(key_or_keys, default)
4269
4270
4271 def try_get(src, getter, expected_type=None):
4272     if not isinstance(getter, (list, tuple)):
4273         getter = [getter]
4274     for get in getter:
4275         try:
4276             v = get(src)
4277         except (AttributeError, KeyError, TypeError, IndexError):
4278             pass
4279         else:
4280             if expected_type is None or isinstance(v, expected_type):
4281                 return v
4282
4283
4284 def merge_dicts(*dicts):
4285     merged = {}
4286     for a_dict in dicts:
4287         for k, v in a_dict.items():
4288             if v is None:
4289                 continue
4290             if (k not in merged
4291                     or (isinstance(v, compat_str) and v
4292                         and isinstance(merged[k], compat_str)
4293                         and not merged[k])):
4294                 merged[k] = v
4295     return merged
4296
4297
4298 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4299     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4300
4301
4302 US_RATINGS = {
4303     'G': 0,
4304     'PG': 10,
4305     'PG-13': 13,
4306     'R': 16,
4307     'NC': 18,
4308 }
4309
4310
4311 TV_PARENTAL_GUIDELINES = {
4312     'TV-Y': 0,
4313     'TV-Y7': 7,
4314     'TV-G': 0,
4315     'TV-PG': 0,
4316     'TV-14': 14,
4317     'TV-MA': 17,
4318 }
4319
4320
4321 def parse_age_limit(s):
4322     if type(s) == int:
4323         return s if 0 <= s <= 21 else None
4324     if not isinstance(s, compat_basestring):
4325         return None
4326     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4327     if m:
4328         return int(m.group('age'))
4329     s = s.upper()
4330     if s in US_RATINGS:
4331         return US_RATINGS[s]
4332     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4333     if m:
4334         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4335     return None
4336
4337
4338 def strip_jsonp(code):
4339     return re.sub(
4340         r'''(?sx)^
4341             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4342             (?:\s*&&\s*(?P=func_name))?
4343             \s*\(\s*(?P<callback_data>.*)\);?
4344             \s*?(?://[^\n]*)*$''',
4345         r'\g<callback_data>', code)
4346
4347
4348 def js_to_json(code, vars={}):
4349     # vars is a dict of var, val pairs to substitute
4350     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4351     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4352     INTEGER_TABLE = (
4353         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4354         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4355     )
4356
4357     def fix_kv(m):
4358         v = m.group(0)
4359         if v in ('true', 'false', 'null'):
4360             return v
4361         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4362             return ""
4363
4364         if v[0] in ("'", '"'):
4365             v = re.sub(r'(?s)\\.|"', lambda m: {
4366                 '"': '\\"',
4367                 "\\'": "'",
4368                 '\\\n': '',
4369                 '\\x': '\\u00',
4370             }.get(m.group(0), m.group(0)), v[1:-1])
4371         else:
4372             for regex, base in INTEGER_TABLE:
4373                 im = re.match(regex, v)
4374                 if im:
4375                     i = int(im.group(1), base)
4376                     return '"%d":' % i if v.endswith(':') else '%d' % i
4377
4378             if v in vars:
4379                 return vars[v]
4380
4381         return '"%s"' % v
4382
4383     return re.sub(r'''(?sx)
4384         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4385         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4386         {comment}|,(?={skip}[\]}}])|
4387         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4388         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4389         [0-9]+(?={skip}:)|
4390         !+
4391         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4392
4393
4394 def qualities(quality_ids):
4395     """ Get a numeric quality value out of a list of possible values """
4396     def q(qid):
4397         try:
4398             return quality_ids.index(qid)
4399         except ValueError:
4400             return -1
4401     return q
4402
4403
4404 DEFAULT_OUTTMPL = {
4405     'default': '%(title)s [%(id)s].%(ext)s',
4406     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4407 }
4408 OUTTMPL_TYPES = {
4409     'chapter': None,
4410     'subtitle': None,
4411     'thumbnail': None,
4412     'description': 'description',
4413     'annotation': 'annotations.xml',
4414     'infojson': 'info.json',
4415     'pl_thumbnail': None,
4416     'pl_description': 'description',
4417     'pl_infojson': 'info.json',
4418 }
4419
4420 # As of [1] format syntax is:
4421 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4422 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4423 STR_FORMAT_RE = r'''(?x)
4424     (?<!%)
4425     %
4426     (?P<has_key>\((?P<key>{0})\))?  # mapping key
4427     (?P<format>
4428         (?:[#0\-+ ]+)?  # conversion flags (optional)
4429         (?:\d+)?  # minimum field width (optional)
4430         (?:\.\d+)?  # precision (optional)
4431         [hlL]?  # length modifier (optional)
4432         [diouxXeEfFgGcrs]  # conversion type
4433     )
4434 '''
4435
4436
4437 def limit_length(s, length):
4438     """ Add ellipses to overly long strings """
4439     if s is None:
4440         return None
4441     ELLIPSES = '...'
4442     if len(s) > length:
4443         return s[:length - len(ELLIPSES)] + ELLIPSES
4444     return s
4445
4446
4447 def version_tuple(v):
4448     return tuple(int(e) for e in re.split(r'[-.]', v))
4449
4450
4451 def is_outdated_version(version, limit, assume_new=True):
4452     if not version:
4453         return not assume_new
4454     try:
4455         return version_tuple(version) < version_tuple(limit)
4456     except ValueError:
4457         return not assume_new
4458
4459
4460 def ytdl_is_updateable():
4461     """ Returns if yt-dlp can be updated with -U """
4462     return False
4463
4464     from zipimport import zipimporter
4465
4466     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4467
4468
4469 def args_to_str(args):
4470     # Get a short string representation for a subprocess command
4471     return ' '.join(compat_shlex_quote(a) for a in args)
4472
4473
4474 def error_to_compat_str(err):
4475     err_str = str(err)
4476     # On python 2 error byte string must be decoded with proper
4477     # encoding rather than ascii
4478     if sys.version_info[0] < 3:
4479         err_str = err_str.decode(preferredencoding())
4480     return err_str
4481
4482
4483 def mimetype2ext(mt):
4484     if mt is None:
4485         return None
4486
4487     ext = {
4488         'audio/mp4': 'm4a',
4489         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4490         # it's the most popular one
4491         'audio/mpeg': 'mp3',
4492         'audio/x-wav': 'wav',
4493     }.get(mt)
4494     if ext is not None:
4495         return ext
4496
4497     _, _, res = mt.rpartition('/')
4498     res = res.split(';')[0].strip().lower()
4499
4500     return {
4501         '3gpp': '3gp',
4502         'smptett+xml': 'tt',
4503         'ttaf+xml': 'dfxp',
4504         'ttml+xml': 'ttml',
4505         'x-flv': 'flv',
4506         'x-mp4-fragmented': 'mp4',
4507         'x-ms-sami': 'sami',
4508         'x-ms-wmv': 'wmv',
4509         'mpegurl': 'm3u8',
4510         'x-mpegurl': 'm3u8',
4511         'vnd.apple.mpegurl': 'm3u8',
4512         'dash+xml': 'mpd',
4513         'f4m+xml': 'f4m',
4514         'hds+xml': 'f4m',
4515         'vnd.ms-sstr+xml': 'ism',
4516         'quicktime': 'mov',
4517         'mp2t': 'ts',
4518         'x-wav': 'wav',
4519     }.get(res, res)
4520
4521
4522 def parse_codecs(codecs_str):
4523     # http://tools.ietf.org/html/rfc6381
4524     if not codecs_str:
4525         return {}
4526     split_codecs = list(filter(None, map(
4527         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4528     vcodec, acodec = None, None
4529     for full_codec in split_codecs:
4530         codec = full_codec.split('.')[0]
4531         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4532             if not vcodec:
4533                 vcodec = full_codec
4534         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4535             if not acodec:
4536                 acodec = full_codec
4537         else:
4538             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4539     if not vcodec and not acodec:
4540         if len(split_codecs) == 2:
4541             return {
4542                 'vcodec': split_codecs[0],
4543                 'acodec': split_codecs[1],
4544             }
4545     else:
4546         return {
4547             'vcodec': vcodec or 'none',
4548             'acodec': acodec or 'none',
4549         }
4550     return {}
4551
4552
4553 def urlhandle_detect_ext(url_handle):
4554     getheader = url_handle.headers.get
4555
4556     cd = getheader('Content-Disposition')
4557     if cd:
4558         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4559         if m:
4560             e = determine_ext(m.group('filename'), default_ext=None)
4561             if e:
4562                 return e
4563
4564     return mimetype2ext(getheader('Content-Type'))
4565
4566
4567 def encode_data_uri(data, mime_type):
4568     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4569
4570
4571 def age_restricted(content_limit, age_limit):
4572     """ Returns True iff the content should be blocked """
4573
4574     if age_limit is None:  # No limit set
4575         return False
4576     if content_limit is None:
4577         return False  # Content available for everyone
4578     return age_limit < content_limit
4579
4580
4581 def is_html(first_bytes):
4582     """ Detect whether a file contains HTML by examining its first bytes. """
4583
4584     BOMS = [
4585         (b'\xef\xbb\xbf', 'utf-8'),
4586         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4587         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4588         (b'\xff\xfe', 'utf-16-le'),
4589         (b'\xfe\xff', 'utf-16-be'),
4590     ]
4591     for bom, enc in BOMS:
4592         if first_bytes.startswith(bom):
4593             s = first_bytes[len(bom):].decode(enc, 'replace')
4594             break
4595     else:
4596         s = first_bytes.decode('utf-8', 'replace')
4597
4598     return re.match(r'^\s*<', s)
4599
4600
4601 def determine_protocol(info_dict):
4602     protocol = info_dict.get('protocol')
4603     if protocol is not None:
4604         return protocol
4605
4606     url = info_dict['url']
4607     if url.startswith('rtmp'):
4608         return 'rtmp'
4609     elif url.startswith('mms'):
4610         return 'mms'
4611     elif url.startswith('rtsp'):
4612         return 'rtsp'
4613
4614     ext = determine_ext(url)
4615     if ext == 'm3u8':
4616         return 'm3u8'
4617     elif ext == 'f4m':
4618         return 'f4m'
4619
4620     return compat_urllib_parse_urlparse(url).scheme
4621
4622
4623 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4624     """ Render a list of rows, each as a list of values """
4625
4626     def get_max_lens(table):
4627         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4628
4629     def filter_using_list(row, filterArray):
4630         return [col for (take, col) in zip(filterArray, row) if take]
4631
4632     if hideEmpty:
4633         max_lens = get_max_lens(data)
4634         header_row = filter_using_list(header_row, max_lens)
4635         data = [filter_using_list(row, max_lens) for row in data]
4636
4637     table = [header_row] + data
4638     max_lens = get_max_lens(table)
4639     if delim:
4640         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4641     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4642     return '\n'.join(format_str % tuple(row) for row in table)
4643
4644
4645 def _match_one(filter_part, dct):
4646     COMPARISON_OPERATORS = {
4647         '<': operator.lt,
4648         '<=': operator.le,
4649         '>': operator.gt,
4650         '>=': operator.ge,
4651         '=': operator.eq,
4652         '!=': operator.ne,
4653     }
4654     operator_rex = re.compile(r'''(?x)\s*
4655         (?P<key>[a-z_]+)
4656         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4657         (?:
4658             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4659             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4660             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4661         )
4662         \s*$
4663         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4664     m = operator_rex.search(filter_part)
4665     if m:
4666         op = COMPARISON_OPERATORS[m.group('op')]
4667         actual_value = dct.get(m.group('key'))
4668         if (m.group('quotedstrval') is not None
4669             or m.group('strval') is not None
4670             # If the original field is a string and matching comparisonvalue is
4671             # a number we should respect the origin of the original field
4672             # and process comparison value as a string (see
4673             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4674             or actual_value is not None and m.group('intval') is not None
4675                 and isinstance(actual_value, compat_str)):
4676             if m.group('op') not in ('=', '!='):
4677                 raise ValueError(
4678                     'Operator %s does not support string values!' % m.group('op'))
4679             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4680             quote = m.group('quote')
4681             if quote is not None:
4682                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4683         else:
4684             try:
4685                 comparison_value = int(m.group('intval'))
4686             except ValueError:
4687                 comparison_value = parse_filesize(m.group('intval'))
4688                 if comparison_value is None:
4689                     comparison_value = parse_filesize(m.group('intval') + 'B')
4690                 if comparison_value is None:
4691                     raise ValueError(
4692                         'Invalid integer value %r in filter part %r' % (
4693                             m.group('intval'), filter_part))
4694         if actual_value is None:
4695             return m.group('none_inclusive')
4696         return op(actual_value, comparison_value)
4697
4698     UNARY_OPERATORS = {
4699         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4700         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4701     }
4702     operator_rex = re.compile(r'''(?x)\s*
4703         (?P<op>%s)\s*(?P<key>[a-z_]+)
4704         \s*$
4705         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4706     m = operator_rex.search(filter_part)
4707     if m:
4708         op = UNARY_OPERATORS[m.group('op')]
4709         actual_value = dct.get(m.group('key'))
4710         return op(actual_value)
4711
4712     raise ValueError('Invalid filter part %r' % filter_part)
4713
4714
4715 def match_str(filter_str, dct):
4716     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4717
4718     return all(
4719         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4720
4721
4722 def match_filter_func(filter_str):
4723     def _match_func(info_dict):
4724         if match_str(filter_str, info_dict):
4725             return None
4726         else:
4727             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4728             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4729     return _match_func
4730
4731
4732 def parse_dfxp_time_expr(time_expr):
4733     if not time_expr:
4734         return
4735
4736     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4737     if mobj:
4738         return float(mobj.group('time_offset'))
4739
4740     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4741     if mobj:
4742         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4743
4744
4745 def srt_subtitles_timecode(seconds):
4746     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4747
4748
4749 def dfxp2srt(dfxp_data):
4750     '''
4751     @param dfxp_data A bytes-like object containing DFXP data
4752     @returns A unicode object containing converted SRT data
4753     '''
4754     LEGACY_NAMESPACES = (
4755         (b'http://www.w3.org/ns/ttml', [
4756             b'http://www.w3.org/2004/11/ttaf1',
4757             b'http://www.w3.org/2006/04/ttaf1',
4758             b'http://www.w3.org/2006/10/ttaf1',
4759         ]),
4760         (b'http://www.w3.org/ns/ttml#styling', [
4761             b'http://www.w3.org/ns/ttml#style',
4762         ]),
4763     )
4764
4765     SUPPORTED_STYLING = [
4766         'color',
4767         'fontFamily',
4768         'fontSize',
4769         'fontStyle',
4770         'fontWeight',
4771         'textDecoration'
4772     ]
4773
4774     _x = functools.partial(xpath_with_ns, ns_map={
4775         'xml': 'http://www.w3.org/XML/1998/namespace',
4776         'ttml': 'http://www.w3.org/ns/ttml',
4777         'tts': 'http://www.w3.org/ns/ttml#styling',
4778     })
4779
4780     styles = {}
4781     default_style = {}
4782
4783     class TTMLPElementParser(object):
4784         _out = ''
4785         _unclosed_elements = []
4786         _applied_styles = []
4787
4788         def start(self, tag, attrib):
4789             if tag in (_x('ttml:br'), 'br'):
4790                 self._out += '\n'
4791             else:
4792                 unclosed_elements = []
4793                 style = {}
4794                 element_style_id = attrib.get('style')
4795                 if default_style:
4796                     style.update(default_style)
4797                 if element_style_id:
4798                     style.update(styles.get(element_style_id, {}))
4799                 for prop in SUPPORTED_STYLING:
4800                     prop_val = attrib.get(_x('tts:' + prop))
4801                     if prop_val:
4802                         style[prop] = prop_val
4803                 if style:
4804                     font = ''
4805                     for k, v in sorted(style.items()):
4806                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4807                             continue
4808                         if k == 'color':
4809                             font += ' color="%s"' % v
4810                         elif k == 'fontSize':
4811                             font += ' size="%s"' % v
4812                         elif k == 'fontFamily':
4813                             font += ' face="%s"' % v
4814                         elif k == 'fontWeight' and v == 'bold':
4815                             self._out += '<b>'
4816                             unclosed_elements.append('b')
4817                         elif k == 'fontStyle' and v == 'italic':
4818                             self._out += '<i>'
4819                             unclosed_elements.append('i')
4820                         elif k == 'textDecoration' and v == 'underline':
4821                             self._out += '<u>'
4822                             unclosed_elements.append('u')
4823                     if font:
4824                         self._out += '<font' + font + '>'
4825                         unclosed_elements.append('font')
4826                     applied_style = {}
4827                     if self._applied_styles:
4828                         applied_style.update(self._applied_styles[-1])
4829                     applied_style.update(style)
4830                     self._applied_styles.append(applied_style)
4831                 self._unclosed_elements.append(unclosed_elements)
4832
4833         def end(self, tag):
4834             if tag not in (_x('ttml:br'), 'br'):
4835                 unclosed_elements = self._unclosed_elements.pop()
4836                 for element in reversed(unclosed_elements):
4837                     self._out += '</%s>' % element
4838                 if unclosed_elements and self._applied_styles:
4839                     self._applied_styles.pop()
4840
4841         def data(self, data):
4842             self._out += data
4843
4844         def close(self):
4845             return self._out.strip()
4846
4847     def parse_node(node):
4848         target = TTMLPElementParser()
4849         parser = xml.etree.ElementTree.XMLParser(target=target)
4850         parser.feed(xml.etree.ElementTree.tostring(node))
4851         return parser.close()
4852
4853     for k, v in LEGACY_NAMESPACES:
4854         for ns in v:
4855             dfxp_data = dfxp_data.replace(ns, k)
4856
4857     dfxp = compat_etree_fromstring(dfxp_data)
4858     out = []
4859     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4860
4861     if not paras:
4862         raise ValueError('Invalid dfxp/TTML subtitle')
4863
4864     repeat = False
4865     while True:
4866         for style in dfxp.findall(_x('.//ttml:style')):
4867             style_id = style.get('id') or style.get(_x('xml:id'))
4868             if not style_id:
4869                 continue
4870             parent_style_id = style.get('style')
4871             if parent_style_id:
4872                 if parent_style_id not in styles:
4873                     repeat = True
4874                     continue
4875                 styles[style_id] = styles[parent_style_id].copy()
4876             for prop in SUPPORTED_STYLING:
4877                 prop_val = style.get(_x('tts:' + prop))
4878                 if prop_val:
4879                     styles.setdefault(style_id, {})[prop] = prop_val
4880         if repeat:
4881             repeat = False
4882         else:
4883             break
4884
4885     for p in ('body', 'div'):
4886         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4887         if ele is None:
4888             continue
4889         style = styles.get(ele.get('style'))
4890         if not style:
4891             continue
4892         default_style.update(style)
4893
4894     for para, index in zip(paras, itertools.count(1)):
4895         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4896         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4897         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4898         if begin_time is None:
4899             continue
4900         if not end_time:
4901             if not dur:
4902                 continue
4903             end_time = begin_time + dur
4904         out.append('%d\n%s --> %s\n%s\n\n' % (
4905             index,
4906             srt_subtitles_timecode(begin_time),
4907             srt_subtitles_timecode(end_time),
4908             parse_node(para)))
4909
4910     return ''.join(out)
4911
4912
4913 def cli_option(params, command_option, param):
4914     param = params.get(param)
4915     if param:
4916         param = compat_str(param)
4917     return [command_option, param] if param is not None else []
4918
4919
4920 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4921     param = params.get(param)
4922     if param is None:
4923         return []
4924     assert isinstance(param, bool)
4925     if separator:
4926         return [command_option + separator + (true_value if param else false_value)]
4927     return [command_option, true_value if param else false_value]
4928
4929
4930 def cli_valueless_option(params, command_option, param, expected_value=True):
4931     param = params.get(param)
4932     return [command_option] if param == expected_value else []
4933
4934
4935 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4936     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4937         if use_compat:
4938             return argdict
4939         else:
4940             argdict = None
4941     if argdict is None:
4942         return default
4943     assert isinstance(argdict, dict)
4944
4945     assert isinstance(keys, (list, tuple))
4946     for key_list in keys:
4947         if isinstance(key_list, compat_str):
4948             key_list = (key_list,)
4949         arg_list = list(filter(
4950             lambda x: x is not None,
4951             [argdict.get(key.lower()) for key in key_list]))
4952         if arg_list:
4953             return [arg for args in arg_list for arg in args]
4954     return default
4955
4956
4957 class ISO639Utils(object):
4958     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4959     _lang_map = {
4960         'aa': 'aar',
4961         'ab': 'abk',
4962         'ae': 'ave',
4963         'af': 'afr',
4964         'ak': 'aka',
4965         'am': 'amh',
4966         'an': 'arg',
4967         'ar': 'ara',
4968         'as': 'asm',
4969         'av': 'ava',
4970         'ay': 'aym',
4971         'az': 'aze',
4972         'ba': 'bak',
4973         'be': 'bel',
4974         'bg': 'bul',
4975         'bh': 'bih',
4976         'bi': 'bis',
4977         'bm': 'bam',
4978         'bn': 'ben',
4979         'bo': 'bod',
4980         'br': 'bre',
4981         'bs': 'bos',
4982         'ca': 'cat',
4983         'ce': 'che',
4984         'ch': 'cha',
4985         'co': 'cos',
4986         'cr': 'cre',
4987         'cs': 'ces',
4988         'cu': 'chu',
4989         'cv': 'chv',
4990         'cy': 'cym',
4991         'da': 'dan',
4992         'de': 'deu',
4993         'dv': 'div',
4994         'dz': 'dzo',
4995         'ee': 'ewe',
4996         'el': 'ell',
4997         'en': 'eng',
4998         'eo': 'epo',
4999         'es': 'spa',
5000         'et': 'est',
5001         'eu': 'eus',
5002         'fa': 'fas',
5003         'ff': 'ful',
5004         'fi': 'fin',
5005         'fj': 'fij',
5006         'fo': 'fao',
5007         'fr': 'fra',
5008         'fy': 'fry',
5009         'ga': 'gle',
5010         'gd': 'gla',
5011         'gl': 'glg',
5012         'gn': 'grn',
5013         'gu': 'guj',
5014         'gv': 'glv',
5015         'ha': 'hau',
5016         'he': 'heb',
5017         'iw': 'heb',  # Replaced by he in 1989 revision
5018         'hi': 'hin',
5019         'ho': 'hmo',
5020         'hr': 'hrv',
5021         'ht': 'hat',
5022         'hu': 'hun',
5023         'hy': 'hye',
5024         'hz': 'her',
5025         'ia': 'ina',
5026         'id': 'ind',
5027         'in': 'ind',  # Replaced by id in 1989 revision
5028         'ie': 'ile',
5029         'ig': 'ibo',
5030         'ii': 'iii',
5031         'ik': 'ipk',
5032         'io': 'ido',
5033         'is': 'isl',
5034         'it': 'ita',
5035         'iu': 'iku',
5036         'ja': 'jpn',
5037         'jv': 'jav',
5038         'ka': 'kat',
5039         'kg': 'kon',
5040         'ki': 'kik',
5041         'kj': 'kua',
5042         'kk': 'kaz',
5043         'kl': 'kal',
5044         'km': 'khm',
5045         'kn': 'kan',
5046         'ko': 'kor',
5047         'kr': 'kau',
5048         'ks': 'kas',
5049         'ku': 'kur',
5050         'kv': 'kom',
5051         'kw': 'cor',
5052         'ky': 'kir',
5053         'la': 'lat',
5054         'lb': 'ltz',
5055         'lg': 'lug',
5056         'li': 'lim',
5057         'ln': 'lin',
5058         'lo': 'lao',
5059         'lt': 'lit',
5060         'lu': 'lub',
5061         'lv': 'lav',
5062         'mg': 'mlg',
5063         'mh': 'mah',
5064         'mi': 'mri',
5065         'mk': 'mkd',
5066         'ml': 'mal',
5067         'mn': 'mon',
5068         'mr': 'mar',
5069         'ms': 'msa',
5070         'mt': 'mlt',
5071         'my': 'mya',
5072         'na': 'nau',
5073         'nb': 'nob',
5074         'nd': 'nde',
5075         'ne': 'nep',
5076         'ng': 'ndo',
5077         'nl': 'nld',
5078         'nn': 'nno',
5079         'no': 'nor',
5080         'nr': 'nbl',
5081         'nv': 'nav',
5082         'ny': 'nya',
5083         'oc': 'oci',
5084         'oj': 'oji',
5085         'om': 'orm',
5086         'or': 'ori',
5087         'os': 'oss',
5088         'pa': 'pan',
5089         'pi': 'pli',
5090         'pl': 'pol',
5091         'ps': 'pus',
5092         'pt': 'por',
5093         'qu': 'que',
5094         'rm': 'roh',
5095         'rn': 'run',
5096         'ro': 'ron',
5097         'ru': 'rus',
5098         'rw': 'kin',
5099         'sa': 'san',
5100         'sc': 'srd',
5101         'sd': 'snd',
5102         'se': 'sme',
5103         'sg': 'sag',
5104         'si': 'sin',
5105         'sk': 'slk',
5106         'sl': 'slv',
5107         'sm': 'smo',
5108         'sn': 'sna',
5109         'so': 'som',
5110         'sq': 'sqi',
5111         'sr': 'srp',
5112         'ss': 'ssw',
5113         'st': 'sot',
5114         'su': 'sun',
5115         'sv': 'swe',
5116         'sw': 'swa',
5117         'ta': 'tam',
5118         'te': 'tel',
5119         'tg': 'tgk',
5120         'th': 'tha',
5121         'ti': 'tir',
5122         'tk': 'tuk',
5123         'tl': 'tgl',
5124         'tn': 'tsn',
5125         'to': 'ton',
5126         'tr': 'tur',
5127         'ts': 'tso',
5128         'tt': 'tat',
5129         'tw': 'twi',
5130         'ty': 'tah',
5131         'ug': 'uig',
5132         'uk': 'ukr',
5133         'ur': 'urd',
5134         'uz': 'uzb',
5135         've': 'ven',
5136         'vi': 'vie',
5137         'vo': 'vol',
5138         'wa': 'wln',
5139         'wo': 'wol',
5140         'xh': 'xho',
5141         'yi': 'yid',
5142         'ji': 'yid',  # Replaced by yi in 1989 revision
5143         'yo': 'yor',
5144         'za': 'zha',
5145         'zh': 'zho',
5146         'zu': 'zul',
5147     }
5148
5149     @classmethod
5150     def short2long(cls, code):
5151         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5152         return cls._lang_map.get(code[:2])
5153
5154     @classmethod
5155     def long2short(cls, code):
5156         """Convert language code from ISO 639-2/T to ISO 639-1"""
5157         for short_name, long_name in cls._lang_map.items():
5158             if long_name == code:
5159                 return short_name
5160
5161
5162 class ISO3166Utils(object):
5163     # From http://data.okfn.org/data/core/country-list
5164     _country_map = {
5165         'AF': 'Afghanistan',
5166         'AX': 'Åland Islands',
5167         'AL': 'Albania',
5168         'DZ': 'Algeria',
5169         'AS': 'American Samoa',
5170         'AD': 'Andorra',
5171         'AO': 'Angola',
5172         'AI': 'Anguilla',
5173         'AQ': 'Antarctica',
5174         'AG': 'Antigua and Barbuda',
5175         'AR': 'Argentina',
5176         'AM': 'Armenia',
5177         'AW': 'Aruba',
5178         'AU': 'Australia',
5179         'AT': 'Austria',
5180         'AZ': 'Azerbaijan',
5181         'BS': 'Bahamas',
5182         'BH': 'Bahrain',
5183         'BD': 'Bangladesh',
5184         'BB': 'Barbados',
5185         'BY': 'Belarus',
5186         'BE': 'Belgium',
5187         'BZ': 'Belize',
5188         'BJ': 'Benin',
5189         'BM': 'Bermuda',
5190         'BT': 'Bhutan',
5191         'BO': 'Bolivia, Plurinational State of',
5192         'BQ': 'Bonaire, Sint Eustatius and Saba',
5193         'BA': 'Bosnia and Herzegovina',
5194         'BW': 'Botswana',
5195         'BV': 'Bouvet Island',
5196         'BR': 'Brazil',
5197         'IO': 'British Indian Ocean Territory',
5198         'BN': 'Brunei Darussalam',
5199         'BG': 'Bulgaria',
5200         'BF': 'Burkina Faso',
5201         'BI': 'Burundi',
5202         'KH': 'Cambodia',
5203         'CM': 'Cameroon',
5204         'CA': 'Canada',
5205         'CV': 'Cape Verde',
5206         'KY': 'Cayman Islands',
5207         'CF': 'Central African Republic',
5208         'TD': 'Chad',
5209         'CL': 'Chile',
5210         'CN': 'China',
5211         'CX': 'Christmas Island',
5212         'CC': 'Cocos (Keeling) Islands',
5213         'CO': 'Colombia',
5214         'KM': 'Comoros',
5215         'CG': 'Congo',
5216         'CD': 'Congo, the Democratic Republic of the',
5217         'CK': 'Cook Islands',
5218         'CR': 'Costa Rica',
5219         'CI': 'Côte d\'Ivoire',
5220         'HR': 'Croatia',
5221         'CU': 'Cuba',
5222         'CW': 'Curaçao',
5223         'CY': 'Cyprus',
5224         'CZ': 'Czech Republic',
5225         'DK': 'Denmark',
5226         'DJ': 'Djibouti',
5227         'DM': 'Dominica',
5228         'DO': 'Dominican Republic',
5229         'EC': 'Ecuador',
5230         'EG': 'Egypt',
5231         'SV': 'El Salvador',
5232         'GQ': 'Equatorial Guinea',
5233         'ER': 'Eritrea',
5234         'EE': 'Estonia',
5235         'ET': 'Ethiopia',
5236         'FK': 'Falkland Islands (Malvinas)',
5237         'FO': 'Faroe Islands',
5238         'FJ': 'Fiji',
5239         'FI': 'Finland',
5240         'FR': 'France',
5241         'GF': 'French Guiana',
5242         'PF': 'French Polynesia',
5243         'TF': 'French Southern Territories',
5244         'GA': 'Gabon',
5245         'GM': 'Gambia',
5246         'GE': 'Georgia',
5247         'DE': 'Germany',
5248         'GH': 'Ghana',
5249         'GI': 'Gibraltar',
5250         'GR': 'Greece',
5251         'GL': 'Greenland',
5252         'GD': 'Grenada',
5253         'GP': 'Guadeloupe',
5254         'GU': 'Guam',
5255         'GT': 'Guatemala',
5256         'GG': 'Guernsey',
5257         'GN': 'Guinea',
5258         'GW': 'Guinea-Bissau',
5259         'GY': 'Guyana',
5260         'HT': 'Haiti',
5261         'HM': 'Heard Island and McDonald Islands',
5262         'VA': 'Holy See (Vatican City State)',
5263         'HN': 'Honduras',
5264         'HK': 'Hong Kong',
5265         'HU': 'Hungary',
5266         'IS': 'Iceland',
5267         'IN': 'India',
5268         'ID': 'Indonesia',
5269         'IR': 'Iran, Islamic Republic of',
5270         'IQ': 'Iraq',
5271         'IE': 'Ireland',
5272         'IM': 'Isle of Man',
5273         'IL': 'Israel',
5274         'IT': 'Italy',
5275         'JM': 'Jamaica',
5276         'JP': 'Japan',
5277         'JE': 'Jersey',
5278         'JO': 'Jordan',
5279         'KZ': 'Kazakhstan',
5280         'KE': 'Kenya',
5281         'KI': 'Kiribati',
5282         'KP': 'Korea, Democratic People\'s Republic of',
5283         'KR': 'Korea, Republic of',
5284         'KW': 'Kuwait',
5285         'KG': 'Kyrgyzstan',
5286         'LA': 'Lao People\'s Democratic Republic',
5287         'LV': 'Latvia',
5288         'LB': 'Lebanon',
5289         'LS': 'Lesotho',
5290         'LR': 'Liberia',
5291         'LY': 'Libya',
5292         'LI': 'Liechtenstein',
5293         'LT': 'Lithuania',
5294         'LU': 'Luxembourg',
5295         'MO': 'Macao',
5296         'MK': 'Macedonia, the Former Yugoslav Republic of',
5297         'MG': 'Madagascar',
5298         'MW': 'Malawi',
5299         'MY': 'Malaysia',
5300         'MV': 'Maldives',
5301         'ML': 'Mali',
5302         'MT': 'Malta',
5303         'MH': 'Marshall Islands',
5304         'MQ': 'Martinique',
5305         'MR': 'Mauritania',
5306         'MU': 'Mauritius',
5307         'YT': 'Mayotte',
5308         'MX': 'Mexico',
5309         'FM': 'Micronesia, Federated States of',
5310         'MD': 'Moldova, Republic of',
5311         'MC': 'Monaco',
5312         'MN': 'Mongolia',
5313         'ME': 'Montenegro',
5314         'MS': 'Montserrat',
5315         'MA': 'Morocco',
5316         'MZ': 'Mozambique',
5317         'MM': 'Myanmar',
5318         'NA': 'Namibia',
5319         'NR': 'Nauru',
5320         'NP': 'Nepal',
5321         'NL': 'Netherlands',
5322         'NC': 'New Caledonia',
5323         'NZ': 'New Zealand',
5324         'NI': 'Nicaragua',
5325         'NE': 'Niger',
5326         'NG': 'Nigeria',
5327         'NU': 'Niue',
5328         'NF': 'Norfolk Island',
5329         'MP': 'Northern Mariana Islands',
5330         'NO': 'Norway',
5331         'OM': 'Oman',
5332         'PK': 'Pakistan',
5333         'PW': 'Palau',
5334         'PS': 'Palestine, State of',
5335         'PA': 'Panama',
5336         'PG': 'Papua New Guinea',
5337         'PY': 'Paraguay',
5338         'PE': 'Peru',
5339         'PH': 'Philippines',
5340         'PN': 'Pitcairn',
5341         'PL': 'Poland',
5342         'PT': 'Portugal',
5343         'PR': 'Puerto Rico',
5344         'QA': 'Qatar',
5345         'RE': 'Réunion',
5346         'RO': 'Romania',
5347         'RU': 'Russian Federation',
5348         'RW': 'Rwanda',
5349         'BL': 'Saint Barthélemy',
5350         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5351         'KN': 'Saint Kitts and Nevis',
5352         'LC': 'Saint Lucia',
5353         'MF': 'Saint Martin (French part)',
5354         'PM': 'Saint Pierre and Miquelon',
5355         'VC': 'Saint Vincent and the Grenadines',
5356         'WS': 'Samoa',
5357         'SM': 'San Marino',
5358         'ST': 'Sao Tome and Principe',
5359         'SA': 'Saudi Arabia',
5360         'SN': 'Senegal',
5361         'RS': 'Serbia',
5362         'SC': 'Seychelles',
5363         'SL': 'Sierra Leone',
5364         'SG': 'Singapore',
5365         'SX': 'Sint Maarten (Dutch part)',
5366         'SK': 'Slovakia',
5367         'SI': 'Slovenia',
5368         'SB': 'Solomon Islands',
5369         'SO': 'Somalia',
5370         'ZA': 'South Africa',
5371         'GS': 'South Georgia and the South Sandwich Islands',
5372         'SS': 'South Sudan',
5373         'ES': 'Spain',
5374         'LK': 'Sri Lanka',
5375         'SD': 'Sudan',
5376         'SR': 'Suriname',
5377         'SJ': 'Svalbard and Jan Mayen',
5378         'SZ': 'Swaziland',
5379         'SE': 'Sweden',
5380         'CH': 'Switzerland',
5381         'SY': 'Syrian Arab Republic',
5382         'TW': 'Taiwan, Province of China',
5383         'TJ': 'Tajikistan',
5384         'TZ': 'Tanzania, United Republic of',
5385         'TH': 'Thailand',
5386         'TL': 'Timor-Leste',
5387         'TG': 'Togo',
5388         'TK': 'Tokelau',
5389         'TO': 'Tonga',
5390         'TT': 'Trinidad and Tobago',
5391         'TN': 'Tunisia',
5392         'TR': 'Turkey',
5393         'TM': 'Turkmenistan',
5394         'TC': 'Turks and Caicos Islands',
5395         'TV': 'Tuvalu',
5396         'UG': 'Uganda',
5397         'UA': 'Ukraine',
5398         'AE': 'United Arab Emirates',
5399         'GB': 'United Kingdom',
5400         'US': 'United States',
5401         'UM': 'United States Minor Outlying Islands',
5402         'UY': 'Uruguay',
5403         'UZ': 'Uzbekistan',
5404         'VU': 'Vanuatu',
5405         'VE': 'Venezuela, Bolivarian Republic of',
5406         'VN': 'Viet Nam',
5407         'VG': 'Virgin Islands, British',
5408         'VI': 'Virgin Islands, U.S.',
5409         'WF': 'Wallis and Futuna',
5410         'EH': 'Western Sahara',
5411         'YE': 'Yemen',
5412         'ZM': 'Zambia',
5413         'ZW': 'Zimbabwe',
5414     }
5415
5416     @classmethod
5417     def short2full(cls, code):
5418         """Convert an ISO 3166-2 country code to the corresponding full name"""
5419         return cls._country_map.get(code.upper())
5420
5421
5422 class GeoUtils(object):
5423     # Major IPv4 address blocks per country
5424     _country_ip_map = {
5425         'AD': '46.172.224.0/19',
5426         'AE': '94.200.0.0/13',
5427         'AF': '149.54.0.0/17',
5428         'AG': '209.59.64.0/18',
5429         'AI': '204.14.248.0/21',
5430         'AL': '46.99.0.0/16',
5431         'AM': '46.70.0.0/15',
5432         'AO': '105.168.0.0/13',
5433         'AP': '182.50.184.0/21',
5434         'AQ': '23.154.160.0/24',
5435         'AR': '181.0.0.0/12',
5436         'AS': '202.70.112.0/20',
5437         'AT': '77.116.0.0/14',
5438         'AU': '1.128.0.0/11',
5439         'AW': '181.41.0.0/18',
5440         'AX': '185.217.4.0/22',
5441         'AZ': '5.197.0.0/16',
5442         'BA': '31.176.128.0/17',
5443         'BB': '65.48.128.0/17',
5444         'BD': '114.130.0.0/16',
5445         'BE': '57.0.0.0/8',
5446         'BF': '102.178.0.0/15',
5447         'BG': '95.42.0.0/15',
5448         'BH': '37.131.0.0/17',
5449         'BI': '154.117.192.0/18',
5450         'BJ': '137.255.0.0/16',
5451         'BL': '185.212.72.0/23',
5452         'BM': '196.12.64.0/18',
5453         'BN': '156.31.0.0/16',
5454         'BO': '161.56.0.0/16',
5455         'BQ': '161.0.80.0/20',
5456         'BR': '191.128.0.0/12',
5457         'BS': '24.51.64.0/18',
5458         'BT': '119.2.96.0/19',
5459         'BW': '168.167.0.0/16',
5460         'BY': '178.120.0.0/13',
5461         'BZ': '179.42.192.0/18',
5462         'CA': '99.224.0.0/11',
5463         'CD': '41.243.0.0/16',
5464         'CF': '197.242.176.0/21',
5465         'CG': '160.113.0.0/16',
5466         'CH': '85.0.0.0/13',
5467         'CI': '102.136.0.0/14',
5468         'CK': '202.65.32.0/19',
5469         'CL': '152.172.0.0/14',
5470         'CM': '102.244.0.0/14',
5471         'CN': '36.128.0.0/10',
5472         'CO': '181.240.0.0/12',
5473         'CR': '201.192.0.0/12',
5474         'CU': '152.206.0.0/15',
5475         'CV': '165.90.96.0/19',
5476         'CW': '190.88.128.0/17',
5477         'CY': '31.153.0.0/16',
5478         'CZ': '88.100.0.0/14',
5479         'DE': '53.0.0.0/8',
5480         'DJ': '197.241.0.0/17',
5481         'DK': '87.48.0.0/12',
5482         'DM': '192.243.48.0/20',
5483         'DO': '152.166.0.0/15',
5484         'DZ': '41.96.0.0/12',
5485         'EC': '186.68.0.0/15',
5486         'EE': '90.190.0.0/15',
5487         'EG': '156.160.0.0/11',
5488         'ER': '196.200.96.0/20',
5489         'ES': '88.0.0.0/11',
5490         'ET': '196.188.0.0/14',
5491         'EU': '2.16.0.0/13',
5492         'FI': '91.152.0.0/13',
5493         'FJ': '144.120.0.0/16',
5494         'FK': '80.73.208.0/21',
5495         'FM': '119.252.112.0/20',
5496         'FO': '88.85.32.0/19',
5497         'FR': '90.0.0.0/9',
5498         'GA': '41.158.0.0/15',
5499         'GB': '25.0.0.0/8',
5500         'GD': '74.122.88.0/21',
5501         'GE': '31.146.0.0/16',
5502         'GF': '161.22.64.0/18',
5503         'GG': '62.68.160.0/19',
5504         'GH': '154.160.0.0/12',
5505         'GI': '95.164.0.0/16',
5506         'GL': '88.83.0.0/19',
5507         'GM': '160.182.0.0/15',
5508         'GN': '197.149.192.0/18',
5509         'GP': '104.250.0.0/19',
5510         'GQ': '105.235.224.0/20',
5511         'GR': '94.64.0.0/13',
5512         'GT': '168.234.0.0/16',
5513         'GU': '168.123.0.0/16',
5514         'GW': '197.214.80.0/20',
5515         'GY': '181.41.64.0/18',
5516         'HK': '113.252.0.0/14',
5517         'HN': '181.210.0.0/16',
5518         'HR': '93.136.0.0/13',
5519         'HT': '148.102.128.0/17',
5520         'HU': '84.0.0.0/14',
5521         'ID': '39.192.0.0/10',
5522         'IE': '87.32.0.0/12',
5523         'IL': '79.176.0.0/13',
5524         'IM': '5.62.80.0/20',
5525         'IN': '117.192.0.0/10',
5526         'IO': '203.83.48.0/21',
5527         'IQ': '37.236.0.0/14',
5528         'IR': '2.176.0.0/12',
5529         'IS': '82.221.0.0/16',
5530         'IT': '79.0.0.0/10',
5531         'JE': '87.244.64.0/18',
5532         'JM': '72.27.0.0/17',
5533         'JO': '176.29.0.0/16',
5534         'JP': '133.0.0.0/8',
5535         'KE': '105.48.0.0/12',
5536         'KG': '158.181.128.0/17',
5537         'KH': '36.37.128.0/17',
5538         'KI': '103.25.140.0/22',
5539         'KM': '197.255.224.0/20',
5540         'KN': '198.167.192.0/19',
5541         'KP': '175.45.176.0/22',
5542         'KR': '175.192.0.0/10',
5543         'KW': '37.36.0.0/14',
5544         'KY': '64.96.0.0/15',
5545         'KZ': '2.72.0.0/13',
5546         'LA': '115.84.64.0/18',
5547         'LB': '178.135.0.0/16',
5548         'LC': '24.92.144.0/20',
5549         'LI': '82.117.0.0/19',
5550         'LK': '112.134.0.0/15',
5551         'LR': '102.183.0.0/16',
5552         'LS': '129.232.0.0/17',
5553         'LT': '78.56.0.0/13',
5554         'LU': '188.42.0.0/16',
5555         'LV': '46.109.0.0/16',
5556         'LY': '41.252.0.0/14',
5557         'MA': '105.128.0.0/11',
5558         'MC': '88.209.64.0/18',
5559         'MD': '37.246.0.0/16',
5560         'ME': '178.175.0.0/17',
5561         'MF': '74.112.232.0/21',
5562         'MG': '154.126.0.0/17',
5563         'MH': '117.103.88.0/21',
5564         'MK': '77.28.0.0/15',
5565         'ML': '154.118.128.0/18',
5566         'MM': '37.111.0.0/17',
5567         'MN': '49.0.128.0/17',
5568         'MO': '60.246.0.0/16',
5569         'MP': '202.88.64.0/20',
5570         'MQ': '109.203.224.0/19',
5571         'MR': '41.188.64.0/18',
5572         'MS': '208.90.112.0/22',
5573         'MT': '46.11.0.0/16',
5574         'MU': '105.16.0.0/12',
5575         'MV': '27.114.128.0/18',
5576         'MW': '102.70.0.0/15',
5577         'MX': '187.192.0.0/11',
5578         'MY': '175.136.0.0/13',
5579         'MZ': '197.218.0.0/15',
5580         'NA': '41.182.0.0/16',
5581         'NC': '101.101.0.0/18',
5582         'NE': '197.214.0.0/18',
5583         'NF': '203.17.240.0/22',
5584         'NG': '105.112.0.0/12',
5585         'NI': '186.76.0.0/15',
5586         'NL': '145.96.0.0/11',
5587         'NO': '84.208.0.0/13',
5588         'NP': '36.252.0.0/15',
5589         'NR': '203.98.224.0/19',
5590         'NU': '49.156.48.0/22',
5591         'NZ': '49.224.0.0/14',
5592         'OM': '5.36.0.0/15',
5593         'PA': '186.72.0.0/15',
5594         'PE': '186.160.0.0/14',
5595         'PF': '123.50.64.0/18',
5596         'PG': '124.240.192.0/19',
5597         'PH': '49.144.0.0/13',
5598         'PK': '39.32.0.0/11',
5599         'PL': '83.0.0.0/11',
5600         'PM': '70.36.0.0/20',
5601         'PR': '66.50.0.0/16',
5602         'PS': '188.161.0.0/16',
5603         'PT': '85.240.0.0/13',
5604         'PW': '202.124.224.0/20',
5605         'PY': '181.120.0.0/14',
5606         'QA': '37.210.0.0/15',
5607         'RE': '102.35.0.0/16',
5608         'RO': '79.112.0.0/13',
5609         'RS': '93.86.0.0/15',
5610         'RU': '5.136.0.0/13',
5611         'RW': '41.186.0.0/16',
5612         'SA': '188.48.0.0/13',
5613         'SB': '202.1.160.0/19',
5614         'SC': '154.192.0.0/11',
5615         'SD': '102.120.0.0/13',
5616         'SE': '78.64.0.0/12',
5617         'SG': '8.128.0.0/10',
5618         'SI': '188.196.0.0/14',
5619         'SK': '78.98.0.0/15',
5620         'SL': '102.143.0.0/17',
5621         'SM': '89.186.32.0/19',
5622         'SN': '41.82.0.0/15',
5623         'SO': '154.115.192.0/18',
5624         'SR': '186.179.128.0/17',
5625         'SS': '105.235.208.0/21',
5626         'ST': '197.159.160.0/19',
5627         'SV': '168.243.0.0/16',
5628         'SX': '190.102.0.0/20',
5629         'SY': '5.0.0.0/16',
5630         'SZ': '41.84.224.0/19',
5631         'TC': '65.255.48.0/20',
5632         'TD': '154.68.128.0/19',
5633         'TG': '196.168.0.0/14',
5634         'TH': '171.96.0.0/13',
5635         'TJ': '85.9.128.0/18',
5636         'TK': '27.96.24.0/21',
5637         'TL': '180.189.160.0/20',
5638         'TM': '95.85.96.0/19',
5639         'TN': '197.0.0.0/11',
5640         'TO': '175.176.144.0/21',
5641         'TR': '78.160.0.0/11',
5642         'TT': '186.44.0.0/15',
5643         'TV': '202.2.96.0/19',
5644         'TW': '120.96.0.0/11',
5645         'TZ': '156.156.0.0/14',
5646         'UA': '37.52.0.0/14',
5647         'UG': '102.80.0.0/13',
5648         'US': '6.0.0.0/8',
5649         'UY': '167.56.0.0/13',
5650         'UZ': '84.54.64.0/18',
5651         'VA': '212.77.0.0/19',
5652         'VC': '207.191.240.0/21',
5653         'VE': '186.88.0.0/13',
5654         'VG': '66.81.192.0/20',
5655         'VI': '146.226.0.0/16',
5656         'VN': '14.160.0.0/11',
5657         'VU': '202.80.32.0/20',
5658         'WF': '117.20.32.0/21',
5659         'WS': '202.4.32.0/19',
5660         'YE': '134.35.0.0/16',
5661         'YT': '41.242.116.0/22',
5662         'ZA': '41.0.0.0/11',
5663         'ZM': '102.144.0.0/13',
5664         'ZW': '102.177.192.0/18',
5665     }
5666
5667     @classmethod
5668     def random_ipv4(cls, code_or_block):
5669         if len(code_or_block) == 2:
5670             block = cls._country_ip_map.get(code_or_block.upper())
5671             if not block:
5672                 return None
5673         else:
5674             block = code_or_block
5675         addr, preflen = block.split('/')
5676         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5677         addr_max = addr_min | (0xffffffff >> int(preflen))
5678         return compat_str(socket.inet_ntoa(
5679             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5680
5681
5682 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5683     def __init__(self, proxies=None):
5684         # Set default handlers
5685         for type in ('http', 'https'):
5686             setattr(self, '%s_open' % type,
5687                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5688                         meth(r, proxy, type))
5689         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5690
5691     def proxy_open(self, req, proxy, type):
5692         req_proxy = req.headers.get('Ytdl-request-proxy')
5693         if req_proxy is not None:
5694             proxy = req_proxy
5695             del req.headers['Ytdl-request-proxy']
5696
5697         if proxy == '__noproxy__':
5698             return None  # No Proxy
5699         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5700             req.add_header('Ytdl-socks-proxy', proxy)
5701             # yt-dlp's http/https handlers do wrapping the socket with socks
5702             return None
5703         return compat_urllib_request.ProxyHandler.proxy_open(
5704             self, req, proxy, type)
5705
5706
5707 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5708 # released into Public Domain
5709 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5710
5711 def long_to_bytes(n, blocksize=0):
5712     """long_to_bytes(n:long, blocksize:int) : string
5713     Convert a long integer to a byte string.
5714
5715     If optional blocksize is given and greater than zero, pad the front of the
5716     byte string with binary zeros so that the length is a multiple of
5717     blocksize.
5718     """
5719     # after much testing, this algorithm was deemed to be the fastest
5720     s = b''
5721     n = int(n)
5722     while n > 0:
5723         s = compat_struct_pack('>I', n & 0xffffffff) + s
5724         n = n >> 32
5725     # strip off leading zeros
5726     for i in range(len(s)):
5727         if s[i] != b'\000'[0]:
5728             break
5729     else:
5730         # only happens when n == 0
5731         s = b'\000'
5732         i = 0
5733     s = s[i:]
5734     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5735     # de-padding being done above, but sigh...
5736     if blocksize > 0 and len(s) % blocksize:
5737         s = (blocksize - len(s) % blocksize) * b'\000' + s
5738     return s
5739
5740
5741 def bytes_to_long(s):
5742     """bytes_to_long(string) : long
5743     Convert a byte string to a long integer.
5744
5745     This is (essentially) the inverse of long_to_bytes().
5746     """
5747     acc = 0
5748     length = len(s)
5749     if length % 4:
5750         extra = (4 - length % 4)
5751         s = b'\000' * extra + s
5752         length = length + extra
5753     for i in range(0, length, 4):
5754         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5755     return acc
5756
5757
5758 def ohdave_rsa_encrypt(data, exponent, modulus):
5759     '''
5760     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5761
5762     Input:
5763         data: data to encrypt, bytes-like object
5764         exponent, modulus: parameter e and N of RSA algorithm, both integer
5765     Output: hex string of encrypted data
5766
5767     Limitation: supports one block encryption only
5768     '''
5769
5770     payload = int(binascii.hexlify(data[::-1]), 16)
5771     encrypted = pow(payload, exponent, modulus)
5772     return '%x' % encrypted
5773
5774
5775 def pkcs1pad(data, length):
5776     """
5777     Padding input data with PKCS#1 scheme
5778
5779     @param {int[]} data        input data
5780     @param {int}   length      target length
5781     @returns {int[]}           padded data
5782     """
5783     if len(data) > length - 11:
5784         raise ValueError('Input data too long for PKCS#1 padding')
5785
5786     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5787     return [0, 2] + pseudo_random + [0] + data
5788
5789
5790 def encode_base_n(num, n, table=None):
5791     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5792     if not table:
5793         table = FULL_TABLE[:n]
5794
5795     if n > len(table):
5796         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5797
5798     if num == 0:
5799         return table[0]
5800
5801     ret = ''
5802     while num:
5803         ret = table[num % n] + ret
5804         num = num // n
5805     return ret
5806
5807
5808 def decode_packed_codes(code):
5809     mobj = re.search(PACKED_CODES_RE, code)
5810     obfuscated_code, base, count, symbols = mobj.groups()
5811     base = int(base)
5812     count = int(count)
5813     symbols = symbols.split('|')
5814     symbol_table = {}
5815
5816     while count:
5817         count -= 1
5818         base_n_count = encode_base_n(count, base)
5819         symbol_table[base_n_count] = symbols[count] or base_n_count
5820
5821     return re.sub(
5822         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5823         obfuscated_code)
5824
5825
5826 def caesar(s, alphabet, shift):
5827     if shift == 0:
5828         return s
5829     l = len(alphabet)
5830     return ''.join(
5831         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5832         for c in s)
5833
5834
5835 def rot47(s):
5836     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5837
5838
5839 def parse_m3u8_attributes(attrib):
5840     info = {}
5841     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5842         if val.startswith('"'):
5843             val = val[1:-1]
5844         info[key] = val
5845     return info
5846
5847
5848 def urshift(val, n):
5849     return val >> n if val >= 0 else (val + 0x100000000) >> n
5850
5851
5852 # Based on png2str() written by @gdkchan and improved by @yokrysty
5853 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5854 def decode_png(png_data):
5855     # Reference: https://www.w3.org/TR/PNG/
5856     header = png_data[8:]
5857
5858     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5859         raise IOError('Not a valid PNG file.')
5860
5861     int_map = {1: '>B', 2: '>H', 4: '>I'}
5862     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5863
5864     chunks = []
5865
5866     while header:
5867         length = unpack_integer(header[:4])
5868         header = header[4:]
5869
5870         chunk_type = header[:4]
5871         header = header[4:]
5872
5873         chunk_data = header[:length]
5874         header = header[length:]
5875
5876         header = header[4:]  # Skip CRC
5877
5878         chunks.append({
5879             'type': chunk_type,
5880             'length': length,
5881             'data': chunk_data
5882         })
5883
5884     ihdr = chunks[0]['data']
5885
5886     width = unpack_integer(ihdr[:4])
5887     height = unpack_integer(ihdr[4:8])
5888
5889     idat = b''
5890
5891     for chunk in chunks:
5892         if chunk['type'] == b'IDAT':
5893             idat += chunk['data']
5894
5895     if not idat:
5896         raise IOError('Unable to read PNG data.')
5897
5898     decompressed_data = bytearray(zlib.decompress(idat))
5899
5900     stride = width * 3
5901     pixels = []
5902
5903     def _get_pixel(idx):
5904         x = idx % stride
5905         y = idx // stride
5906         return pixels[y][x]
5907
5908     for y in range(height):
5909         basePos = y * (1 + stride)
5910         filter_type = decompressed_data[basePos]
5911
5912         current_row = []
5913
5914         pixels.append(current_row)
5915
5916         for x in range(stride):
5917             color = decompressed_data[1 + basePos + x]
5918             basex = y * stride + x
5919             left = 0
5920             up = 0
5921
5922             if x > 2:
5923                 left = _get_pixel(basex - 3)
5924             if y > 0:
5925                 up = _get_pixel(basex - stride)
5926
5927             if filter_type == 1:  # Sub
5928                 color = (color + left) & 0xff
5929             elif filter_type == 2:  # Up
5930                 color = (color + up) & 0xff
5931             elif filter_type == 3:  # Average
5932                 color = (color + ((left + up) >> 1)) & 0xff
5933             elif filter_type == 4:  # Paeth
5934                 a = left
5935                 b = up
5936                 c = 0
5937
5938                 if x > 2 and y > 0:
5939                     c = _get_pixel(basex - stride - 3)
5940
5941                 p = a + b - c
5942
5943                 pa = abs(p - a)
5944                 pb = abs(p - b)
5945                 pc = abs(p - c)
5946
5947                 if pa <= pb and pa <= pc:
5948                     color = (color + a) & 0xff
5949                 elif pb <= pc:
5950                     color = (color + b) & 0xff
5951                 else:
5952                     color = (color + c) & 0xff
5953
5954             current_row.append(color)
5955
5956     return width, height, pixels
5957
5958
5959 def write_xattr(path, key, value):
5960     # This mess below finds the best xattr tool for the job
5961     try:
5962         # try the pyxattr module...
5963         import xattr
5964
5965         if hasattr(xattr, 'set'):  # pyxattr
5966             # Unicode arguments are not supported in python-pyxattr until
5967             # version 0.5.0
5968             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5969             pyxattr_required_version = '0.5.0'
5970             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5971                 # TODO: fallback to CLI tools
5972                 raise XAttrUnavailableError(
5973                     'python-pyxattr is detected but is too old. '
5974                     'yt-dlp requires %s or above while your version is %s. '
5975                     'Falling back to other xattr implementations' % (
5976                         pyxattr_required_version, xattr.__version__))
5977
5978             setxattr = xattr.set
5979         else:  # xattr
5980             setxattr = xattr.setxattr
5981
5982         try:
5983             setxattr(path, key, value)
5984         except EnvironmentError as e:
5985             raise XAttrMetadataError(e.errno, e.strerror)
5986
5987     except ImportError:
5988         if compat_os_name == 'nt':
5989             # Write xattrs to NTFS Alternate Data Streams:
5990             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5991             assert ':' not in key
5992             assert os.path.exists(path)
5993
5994             ads_fn = path + ':' + key
5995             try:
5996                 with open(ads_fn, 'wb') as f:
5997                     f.write(value)
5998             except EnvironmentError as e:
5999                 raise XAttrMetadataError(e.errno, e.strerror)
6000         else:
6001             user_has_setfattr = check_executable('setfattr', ['--version'])
6002             user_has_xattr = check_executable('xattr', ['-h'])
6003
6004             if user_has_setfattr or user_has_xattr:
6005
6006                 value = value.decode('utf-8')
6007                 if user_has_setfattr:
6008                     executable = 'setfattr'
6009                     opts = ['-n', key, '-v', value]
6010                 elif user_has_xattr:
6011                     executable = 'xattr'
6012                     opts = ['-w', key, value]
6013
6014                 cmd = ([encodeFilename(executable, True)]
6015                        + [encodeArgument(o) for o in opts]
6016                        + [encodeFilename(path, True)])
6017
6018                 try:
6019                     p = subprocess.Popen(
6020                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6021                 except EnvironmentError as e:
6022                     raise XAttrMetadataError(e.errno, e.strerror)
6023                 stdout, stderr = process_communicate_or_kill(p)
6024                 stderr = stderr.decode('utf-8', 'replace')
6025                 if p.returncode != 0:
6026                     raise XAttrMetadataError(p.returncode, stderr)
6027
6028             else:
6029                 # On Unix, and can't find pyxattr, setfattr, or xattr.
6030                 if sys.platform.startswith('linux'):
6031                     raise XAttrUnavailableError(
6032                         "Couldn't find a tool to set the xattrs. "
6033                         "Install either the python 'pyxattr' or 'xattr' "
6034                         "modules, or the GNU 'attr' package "
6035                         "(which contains the 'setfattr' tool).")
6036                 else:
6037                     raise XAttrUnavailableError(
6038                         "Couldn't find a tool to set the xattrs. "
6039                         "Install either the python 'xattr' module, "
6040                         "or the 'xattr' binary.")
6041
6042
6043 def random_birthday(year_field, month_field, day_field):
6044     start_date = datetime.date(1950, 1, 1)
6045     end_date = datetime.date(1995, 12, 31)
6046     offset = random.randint(0, (end_date - start_date).days)
6047     random_date = start_date + datetime.timedelta(offset)
6048     return {
6049         year_field: str(random_date.year),
6050         month_field: str(random_date.month),
6051         day_field: str(random_date.day),
6052     }
6053
6054
6055 # Templates for internet shortcut files, which are plain text files.
6056 DOT_URL_LINK_TEMPLATE = '''
6057 [InternetShortcut]
6058 URL=%(url)s
6059 '''.lstrip()
6060
6061 DOT_WEBLOC_LINK_TEMPLATE = '''
6062 <?xml version="1.0" encoding="UTF-8"?>
6063 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6064 <plist version="1.0">
6065 <dict>
6066 \t<key>URL</key>
6067 \t<string>%(url)s</string>
6068 </dict>
6069 </plist>
6070 '''.lstrip()
6071
6072 DOT_DESKTOP_LINK_TEMPLATE = '''
6073 [Desktop Entry]
6074 Encoding=UTF-8
6075 Name=%(filename)s
6076 Type=Link
6077 URL=%(url)s
6078 Icon=text-html
6079 '''.lstrip()
6080
6081
6082 def iri_to_uri(iri):
6083     """
6084     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6085
6086     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6087     """
6088
6089     iri_parts = compat_urllib_parse_urlparse(iri)
6090
6091     if '[' in iri_parts.netloc:
6092         raise ValueError('IPv6 URIs are not, yet, supported.')
6093         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6094
6095     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6096
6097     net_location = ''
6098     if iri_parts.username:
6099         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6100         if iri_parts.password is not None:
6101             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6102         net_location += '@'
6103
6104     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6105     # The 'idna' encoding produces ASCII text.
6106     if iri_parts.port is not None and iri_parts.port != 80:
6107         net_location += ':' + str(iri_parts.port)
6108
6109     return compat_urllib_parse_urlunparse(
6110         (iri_parts.scheme,
6111             net_location,
6112
6113             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6114
6115             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6116             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6117
6118             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6119             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6120
6121             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6122
6123     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6124
6125
6126 def to_high_limit_path(path):
6127     if sys.platform in ['win32', 'cygwin']:
6128         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6129         return r'\\?\ '.rstrip() + os.path.abspath(path)
6130
6131     return path
6132
6133
6134 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6135     val = obj.get(field, default)
6136     if func and val not in ignore:
6137         val = func(val)
6138     return template % val if val not in ignore else default
6139
6140
6141 def clean_podcast_url(url):
6142     return re.sub(r'''(?x)
6143         (?:
6144             (?:
6145                 chtbl\.com/track|
6146                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6147                 play\.podtrac\.com
6148             )/[^/]+|
6149             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6150             flex\.acast\.com|
6151             pd(?:
6152                 cn\.co| # https://podcorn.com/analytics-prefix/
6153                 st\.fm # https://podsights.com/docs/
6154             )/e
6155         )/''', '', url)
6156
6157
6158 _HEX_TABLE = '0123456789abcdef'
6159
6160
6161 def random_uuidv4():
6162     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6163
6164
6165 def make_dir(path, to_screen=None):
6166     try:
6167         dn = os.path.dirname(path)
6168         if dn and not os.path.exists(dn):
6169             os.makedirs(dn)
6170         return True
6171     except (OSError, IOError) as err:
6172         if callable(to_screen) is not None:
6173             to_screen('unable to create directory ' + error_to_compat_str(err))
6174         return False
6175
6176
6177 def get_executable_path():
6178     from zipimport import zipimporter
6179     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6180         path = os.path.dirname(sys.executable)
6181     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6182         path = os.path.join(os.path.dirname(__file__), '../..')
6183     else:
6184         path = os.path.join(os.path.dirname(__file__), '..')
6185     return os.path.abspath(path)
6186
6187
6188 def load_plugins(name, suffix, namespace):
6189     plugin_info = [None]
6190     classes = []
6191     try:
6192         plugin_info = imp.find_module(
6193             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6194         plugins = imp.load_module(name, *plugin_info)
6195         for name in dir(plugins):
6196             if name in namespace:
6197                 continue
6198             if not name.endswith(suffix):
6199                 continue
6200             klass = getattr(plugins, name)
6201             classes.append(klass)
6202             namespace[name] = klass
6203     except ImportError:
6204         pass
6205     finally:
6206         if plugin_info[0] is not None:
6207             plugin_info[0].close()
6208     return classes
6209
6210
6211 def traverse_obj(obj, keys, *, casesense=True, is_user_input=False, traverse_string=False):
6212     ''' Traverse nested list/dict/tuple
6213     @param casesense        Whether to consider dictionary keys as case sensitive
6214     @param is_user_input    Whether the keys are generated from user input. If True,
6215                             strings are converted to int/slice if necessary
6216     @param traverse_string  Whether to traverse inside strings. If True, any
6217                             non-compatible object will also be converted into a string
6218     '''
6219     keys = list(keys)[::-1]
6220     while keys:
6221         key = keys.pop()
6222         if isinstance(obj, dict):
6223             assert isinstance(key, compat_str)
6224             if not casesense:
6225                 obj = {k.lower(): v for k, v in obj.items()}
6226                 key = key.lower()
6227             obj = obj.get(key)
6228         else:
6229             if is_user_input:
6230                 key = (int_or_none(key) if ':' not in key
6231                        else slice(*map(int_or_none, key.split(':'))))
6232             if not isinstance(obj, (list, tuple)):
6233                 if traverse_string:
6234                     obj = compat_str(obj)
6235                 else:
6236                     return None
6237             assert isinstance(key, (int, slice))
6238             obj = try_get(obj, lambda x: x[key])
6239     return obj
6240
6241
6242 def traverse_dict(dictn, keys, casesense=True):
6243     ''' For backward compatibility. Do not use '''
6244     return traverse_obj(dictn, keys, casesense=casesense,
6245                         is_user_input=True, traverse_string=True)