yt_dlp/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_HTTPError,
  44     compat_basestring,
  45     compat_chr,
  46     compat_cookiejar,
  47     compat_ctypes_WINFUNCTYPE,
  48     compat_etree_fromstring,
  49     compat_expanduser,
  50     compat_html_entities,
  51     compat_html_entities_html5,
  52     compat_http_client,
  53     compat_integer_types,
  54     compat_numeric_types,
  55     compat_kwargs,
  56     compat_os_name,
  57     compat_parse_qs,
  58     compat_shlex_quote,
  59     compat_str,
  60     compat_struct_pack,
  61     compat_struct_unpack,
  62     compat_urllib_error,
  63     compat_urllib_parse,
  64     compat_urllib_parse_urlencode,
  65     compat_urllib_parse_urlparse,
  66     compat_urllib_parse_urlunparse,
  67     compat_urllib_parse_quote,
  68     compat_urllib_parse_quote_plus,
  69     compat_urllib_parse_unquote_plus,
  70     compat_urllib_request,
  71     compat_urlparse,
  72     compat_xpath,
  73 )
  74
  75 from .socks import (
  76     ProxyType,
  77     sockssocket,
  78 )
  79
  80
  81 def register_socks_protocols():
  82     # "Register" SOCKS protocols
  83     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  84     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  85     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  86         if scheme not in compat_urlparse.uses_netloc:
  87             compat_urlparse.uses_netloc.append(scheme)
  88
  89
  90 # This is not clearly defined otherwise
  91 compiled_regex_type = type(re.compile(''))
  92
  93
  94 def random_user_agent():
  95     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  96     _CHROME_VERSIONS = (
  97         '74.0.3729.129',
  98         '76.0.3780.3',
  99         '76.0.3780.2',
 100         '74.0.3729.128',
 101         '76.0.3780.1',
 102         '76.0.3780.0',
 103         '75.0.3770.15',
 104         '74.0.3729.127',
 105         '74.0.3729.126',
 106         '76.0.3779.1',
 107         '76.0.3779.0',
 108         '75.0.3770.14',
 109         '74.0.3729.125',
 110         '76.0.3778.1',
 111         '76.0.3778.0',
 112         '75.0.3770.13',
 113         '74.0.3729.124',
 114         '74.0.3729.123',
 115         '73.0.3683.121',
 116         '76.0.3777.1',
 117         '76.0.3777.0',
 118         '75.0.3770.12',
 119         '74.0.3729.122',
 120         '76.0.3776.4',
 121         '75.0.3770.11',
 122         '74.0.3729.121',
 123         '76.0.3776.3',
 124         '76.0.3776.2',
 125         '73.0.3683.120',
 126         '74.0.3729.120',
 127         '74.0.3729.119',
 128         '74.0.3729.118',
 129         '76.0.3776.1',
 130         '76.0.3776.0',
 131         '76.0.3775.5',
 132         '75.0.3770.10',
 133         '74.0.3729.117',
 134         '76.0.3775.4',
 135         '76.0.3775.3',
 136         '74.0.3729.116',
 137         '75.0.3770.9',
 138         '76.0.3775.2',
 139         '76.0.3775.1',
 140         '76.0.3775.0',
 141         '75.0.3770.8',
 142         '74.0.3729.115',
 143         '74.0.3729.114',
 144         '76.0.3774.1',
 145         '76.0.3774.0',
 146         '75.0.3770.7',
 147         '74.0.3729.113',
 148         '74.0.3729.112',
 149         '74.0.3729.111',
 150         '76.0.3773.1',
 151         '76.0.3773.0',
 152         '75.0.3770.6',
 153         '74.0.3729.110',
 154         '74.0.3729.109',
 155         '76.0.3772.1',
 156         '76.0.3772.0',
 157         '75.0.3770.5',
 158         '74.0.3729.108',
 159         '74.0.3729.107',
 160         '76.0.3771.1',
 161         '76.0.3771.0',
 162         '75.0.3770.4',
 163         '74.0.3729.106',
 164         '74.0.3729.105',
 165         '75.0.3770.3',
 166         '74.0.3729.104',
 167         '74.0.3729.103',
 168         '74.0.3729.102',
 169         '75.0.3770.2',
 170         '74.0.3729.101',
 171         '75.0.3770.1',
 172         '75.0.3770.0',
 173         '74.0.3729.100',
 174         '75.0.3769.5',
 175         '75.0.3769.4',
 176         '74.0.3729.99',
 177         '75.0.3769.3',
 178         '75.0.3769.2',
 179         '75.0.3768.6',
 180         '74.0.3729.98',
 181         '75.0.3769.1',
 182         '75.0.3769.0',
 183         '74.0.3729.97',
 184         '73.0.3683.119',
 185         '73.0.3683.118',
 186         '74.0.3729.96',
 187         '75.0.3768.5',
 188         '75.0.3768.4',
 189         '75.0.3768.3',
 190         '75.0.3768.2',
 191         '74.0.3729.95',
 192         '74.0.3729.94',
 193         '75.0.3768.1',
 194         '75.0.3768.0',
 195         '74.0.3729.93',
 196         '74.0.3729.92',
 197         '73.0.3683.117',
 198         '74.0.3729.91',
 199         '75.0.3766.3',
 200         '74.0.3729.90',
 201         '75.0.3767.2',
 202         '75.0.3767.1',
 203         '75.0.3767.0',
 204         '74.0.3729.89',
 205         '73.0.3683.116',
 206         '75.0.3766.2',
 207         '74.0.3729.88',
 208         '75.0.3766.1',
 209         '75.0.3766.0',
 210         '74.0.3729.87',
 211         '73.0.3683.115',
 212         '74.0.3729.86',
 213         '75.0.3765.1',
 214         '75.0.3765.0',
 215         '74.0.3729.85',
 216         '73.0.3683.114',
 217         '74.0.3729.84',
 218         '75.0.3764.1',
 219         '75.0.3764.0',
 220         '74.0.3729.83',
 221         '73.0.3683.113',
 222         '75.0.3763.2',
 223         '75.0.3761.4',
 224         '74.0.3729.82',
 225         '75.0.3763.1',
 226         '75.0.3763.0',
 227         '74.0.3729.81',
 228         '73.0.3683.112',
 229         '75.0.3762.1',
 230         '75.0.3762.0',
 231         '74.0.3729.80',
 232         '75.0.3761.3',
 233         '74.0.3729.79',
 234         '73.0.3683.111',
 235         '75.0.3761.2',
 236         '74.0.3729.78',
 237         '74.0.3729.77',
 238         '75.0.3761.1',
 239         '75.0.3761.0',
 240         '73.0.3683.110',
 241         '74.0.3729.76',
 242         '74.0.3729.75',
 243         '75.0.3760.0',
 244         '74.0.3729.74',
 245         '75.0.3759.8',
 246         '75.0.3759.7',
 247         '75.0.3759.6',
 248         '74.0.3729.73',
 249         '75.0.3759.5',
 250         '74.0.3729.72',
 251         '73.0.3683.109',
 252         '75.0.3759.4',
 253         '75.0.3759.3',
 254         '74.0.3729.71',
 255         '75.0.3759.2',
 256         '74.0.3729.70',
 257         '73.0.3683.108',
 258         '74.0.3729.69',
 259         '75.0.3759.1',
 260         '75.0.3759.0',
 261         '74.0.3729.68',
 262         '73.0.3683.107',
 263         '74.0.3729.67',
 264         '75.0.3758.1',
 265         '75.0.3758.0',
 266         '74.0.3729.66',
 267         '73.0.3683.106',
 268         '74.0.3729.65',
 269         '75.0.3757.1',
 270         '75.0.3757.0',
 271         '74.0.3729.64',
 272         '73.0.3683.105',
 273         '74.0.3729.63',
 274         '75.0.3756.1',
 275         '75.0.3756.0',
 276         '74.0.3729.62',
 277         '73.0.3683.104',
 278         '75.0.3755.3',
 279         '75.0.3755.2',
 280         '73.0.3683.103',
 281         '75.0.3755.1',
 282         '75.0.3755.0',
 283         '74.0.3729.61',
 284         '73.0.3683.102',
 285         '74.0.3729.60',
 286         '75.0.3754.2',
 287         '74.0.3729.59',
 288         '75.0.3753.4',
 289         '74.0.3729.58',
 290         '75.0.3754.1',
 291         '75.0.3754.0',
 292         '74.0.3729.57',
 293         '73.0.3683.101',
 294         '75.0.3753.3',
 295         '75.0.3752.2',
 296         '75.0.3753.2',
 297         '74.0.3729.56',
 298         '75.0.3753.1',
 299         '75.0.3753.0',
 300         '74.0.3729.55',
 301         '73.0.3683.100',
 302         '74.0.3729.54',
 303         '75.0.3752.1',
 304         '75.0.3752.0',
 305         '74.0.3729.53',
 306         '73.0.3683.99',
 307         '74.0.3729.52',
 308         '75.0.3751.1',
 309         '75.0.3751.0',
 310         '74.0.3729.51',
 311         '73.0.3683.98',
 312         '74.0.3729.50',
 313         '75.0.3750.0',
 314         '74.0.3729.49',
 315         '74.0.3729.48',
 316         '74.0.3729.47',
 317         '75.0.3749.3',
 318         '74.0.3729.46',
 319         '73.0.3683.97',
 320         '75.0.3749.2',
 321         '74.0.3729.45',
 322         '75.0.3749.1',
 323         '75.0.3749.0',
 324         '74.0.3729.44',
 325         '73.0.3683.96',
 326         '74.0.3729.43',
 327         '74.0.3729.42',
 328         '75.0.3748.1',
 329         '75.0.3748.0',
 330         '74.0.3729.41',
 331         '75.0.3747.1',
 332         '73.0.3683.95',
 333         '75.0.3746.4',
 334         '74.0.3729.40',
 335         '74.0.3729.39',
 336         '75.0.3747.0',
 337         '75.0.3746.3',
 338         '75.0.3746.2',
 339         '74.0.3729.38',
 340         '75.0.3746.1',
 341         '75.0.3746.0',
 342         '74.0.3729.37',
 343         '73.0.3683.94',
 344         '75.0.3745.5',
 345         '75.0.3745.4',
 346         '75.0.3745.3',
 347         '75.0.3745.2',
 348         '74.0.3729.36',
 349         '75.0.3745.1',
 350         '75.0.3745.0',
 351         '75.0.3744.2',
 352         '74.0.3729.35',
 353         '73.0.3683.93',
 354         '74.0.3729.34',
 355         '75.0.3744.1',
 356         '75.0.3744.0',
 357         '74.0.3729.33',
 358         '73.0.3683.92',
 359         '74.0.3729.32',
 360         '74.0.3729.31',
 361         '73.0.3683.91',
 362         '75.0.3741.2',
 363         '75.0.3740.5',
 364         '74.0.3729.30',
 365         '75.0.3741.1',
 366         '75.0.3741.0',
 367         '74.0.3729.29',
 368         '75.0.3740.4',
 369         '73.0.3683.90',
 370         '74.0.3729.28',
 371         '75.0.3740.3',
 372         '73.0.3683.89',
 373         '75.0.3740.2',
 374         '74.0.3729.27',
 375         '75.0.3740.1',
 376         '75.0.3740.0',
 377         '74.0.3729.26',
 378         '73.0.3683.88',
 379         '73.0.3683.87',
 380         '74.0.3729.25',
 381         '75.0.3739.1',
 382         '75.0.3739.0',
 383         '73.0.3683.86',
 384         '74.0.3729.24',
 385         '73.0.3683.85',
 386         '75.0.3738.4',
 387         '75.0.3738.3',
 388         '75.0.3738.2',
 389         '75.0.3738.1',
 390         '75.0.3738.0',
 391         '74.0.3729.23',
 392         '73.0.3683.84',
 393         '74.0.3729.22',
 394         '74.0.3729.21',
 395         '75.0.3737.1',
 396         '75.0.3737.0',
 397         '74.0.3729.20',
 398         '73.0.3683.83',
 399         '74.0.3729.19',
 400         '75.0.3736.1',
 401         '75.0.3736.0',
 402         '74.0.3729.18',
 403         '73.0.3683.82',
 404         '74.0.3729.17',
 405         '75.0.3735.1',
 406         '75.0.3735.0',
 407         '74.0.3729.16',
 408         '73.0.3683.81',
 409         '75.0.3734.1',
 410         '75.0.3734.0',
 411         '74.0.3729.15',
 412         '73.0.3683.80',
 413         '74.0.3729.14',
 414         '75.0.3733.1',
 415         '75.0.3733.0',
 416         '75.0.3732.1',
 417         '74.0.3729.13',
 418         '74.0.3729.12',
 419         '73.0.3683.79',
 420         '74.0.3729.11',
 421         '75.0.3732.0',
 422         '74.0.3729.10',
 423         '73.0.3683.78',
 424         '74.0.3729.9',
 425         '74.0.3729.8',
 426         '74.0.3729.7',
 427         '75.0.3731.3',
 428         '75.0.3731.2',
 429         '75.0.3731.0',
 430         '74.0.3729.6',
 431         '73.0.3683.77',
 432         '73.0.3683.76',
 433         '75.0.3730.5',
 434         '75.0.3730.4',
 435         '73.0.3683.75',
 436         '74.0.3729.5',
 437         '73.0.3683.74',
 438         '75.0.3730.3',
 439         '75.0.3730.2',
 440         '74.0.3729.4',
 441         '73.0.3683.73',
 442         '73.0.3683.72',
 443         '75.0.3730.1',
 444         '75.0.3730.0',
 445         '74.0.3729.3',
 446         '73.0.3683.71',
 447         '74.0.3729.2',
 448         '73.0.3683.70',
 449         '74.0.3729.1',
 450         '74.0.3729.0',
 451         '74.0.3726.4',
 452         '73.0.3683.69',
 453         '74.0.3726.3',
 454         '74.0.3728.0',
 455         '74.0.3726.2',
 456         '73.0.3683.68',
 457         '74.0.3726.1',
 458         '74.0.3726.0',
 459         '74.0.3725.4',
 460         '73.0.3683.67',
 461         '73.0.3683.66',
 462         '74.0.3725.3',
 463         '74.0.3725.2',
 464         '74.0.3725.1',
 465         '74.0.3724.8',
 466         '74.0.3725.0',
 467         '73.0.3683.65',
 468         '74.0.3724.7',
 469         '74.0.3724.6',
 470         '74.0.3724.5',
 471         '74.0.3724.4',
 472         '74.0.3724.3',
 473         '74.0.3724.2',
 474         '74.0.3724.1',
 475         '74.0.3724.0',
 476         '73.0.3683.64',
 477         '74.0.3723.1',
 478         '74.0.3723.0',
 479         '73.0.3683.63',
 480         '74.0.3722.1',
 481         '74.0.3722.0',
 482         '73.0.3683.62',
 483         '74.0.3718.9',
 484         '74.0.3702.3',
 485         '74.0.3721.3',
 486         '74.0.3721.2',
 487         '74.0.3721.1',
 488         '74.0.3721.0',
 489         '74.0.3720.6',
 490         '73.0.3683.61',
 491         '72.0.3626.122',
 492         '73.0.3683.60',
 493         '74.0.3720.5',
 494         '72.0.3626.121',
 495         '74.0.3718.8',
 496         '74.0.3720.4',
 497         '74.0.3720.3',
 498         '74.0.3718.7',
 499         '74.0.3720.2',
 500         '74.0.3720.1',
 501         '74.0.3720.0',
 502         '74.0.3718.6',
 503         '74.0.3719.5',
 504         '73.0.3683.59',
 505         '74.0.3718.5',
 506         '74.0.3718.4',
 507         '74.0.3719.4',
 508         '74.0.3719.3',
 509         '74.0.3719.2',
 510         '74.0.3719.1',
 511         '73.0.3683.58',
 512         '74.0.3719.0',
 513         '73.0.3683.57',
 514         '73.0.3683.56',
 515         '74.0.3718.3',
 516         '73.0.3683.55',
 517         '74.0.3718.2',
 518         '74.0.3718.1',
 519         '74.0.3718.0',
 520         '73.0.3683.54',
 521         '74.0.3717.2',
 522         '73.0.3683.53',
 523         '74.0.3717.1',
 524         '74.0.3717.0',
 525         '73.0.3683.52',
 526         '74.0.3716.1',
 527         '74.0.3716.0',
 528         '73.0.3683.51',
 529         '74.0.3715.1',
 530         '74.0.3715.0',
 531         '73.0.3683.50',
 532         '74.0.3711.2',
 533         '74.0.3714.2',
 534         '74.0.3713.3',
 535         '74.0.3714.1',
 536         '74.0.3714.0',
 537         '73.0.3683.49',
 538         '74.0.3713.1',
 539         '74.0.3713.0',
 540         '72.0.3626.120',
 541         '73.0.3683.48',
 542         '74.0.3712.2',
 543         '74.0.3712.1',
 544         '74.0.3712.0',
 545         '73.0.3683.47',
 546         '72.0.3626.119',
 547         '73.0.3683.46',
 548         '74.0.3710.2',
 549         '72.0.3626.118',
 550         '74.0.3711.1',
 551         '74.0.3711.0',
 552         '73.0.3683.45',
 553         '72.0.3626.117',
 554         '74.0.3710.1',
 555         '74.0.3710.0',
 556         '73.0.3683.44',
 557         '72.0.3626.116',
 558         '74.0.3709.1',
 559         '74.0.3709.0',
 560         '74.0.3704.9',
 561         '73.0.3683.43',
 562         '72.0.3626.115',
 563         '74.0.3704.8',
 564         '74.0.3704.7',
 565         '74.0.3708.0',
 566         '74.0.3706.7',
 567         '74.0.3704.6',
 568         '73.0.3683.42',
 569         '72.0.3626.114',
 570         '74.0.3706.6',
 571         '72.0.3626.113',
 572         '74.0.3704.5',
 573         '74.0.3706.5',
 574         '74.0.3706.4',
 575         '74.0.3706.3',
 576         '74.0.3706.2',
 577         '74.0.3706.1',
 578         '74.0.3706.0',
 579         '73.0.3683.41',
 580         '72.0.3626.112',
 581         '74.0.3705.1',
 582         '74.0.3705.0',
 583         '73.0.3683.40',
 584         '72.0.3626.111',
 585         '73.0.3683.39',
 586         '74.0.3704.4',
 587         '73.0.3683.38',
 588         '74.0.3704.3',
 589         '74.0.3704.2',
 590         '74.0.3704.1',
 591         '74.0.3704.0',
 592         '73.0.3683.37',
 593         '72.0.3626.110',
 594         '72.0.3626.109',
 595         '74.0.3703.3',
 596         '74.0.3703.2',
 597         '73.0.3683.36',
 598         '74.0.3703.1',
 599         '74.0.3703.0',
 600         '73.0.3683.35',
 601         '72.0.3626.108',
 602         '74.0.3702.2',
 603         '74.0.3699.3',
 604         '74.0.3702.1',
 605         '74.0.3702.0',
 606         '73.0.3683.34',
 607         '72.0.3626.107',
 608         '73.0.3683.33',
 609         '74.0.3701.1',
 610         '74.0.3701.0',
 611         '73.0.3683.32',
 612         '73.0.3683.31',
 613         '72.0.3626.105',
 614         '74.0.3700.1',
 615         '74.0.3700.0',
 616         '73.0.3683.29',
 617         '72.0.3626.103',
 618         '74.0.3699.2',
 619         '74.0.3699.1',
 620         '74.0.3699.0',
 621         '73.0.3683.28',
 622         '72.0.3626.102',
 623         '73.0.3683.27',
 624         '73.0.3683.26',
 625         '74.0.3698.0',
 626         '74.0.3696.2',
 627         '72.0.3626.101',
 628         '73.0.3683.25',
 629         '74.0.3696.1',
 630         '74.0.3696.0',
 631         '74.0.3694.8',
 632         '72.0.3626.100',
 633         '74.0.3694.7',
 634         '74.0.3694.6',
 635         '74.0.3694.5',
 636         '74.0.3694.4',
 637         '72.0.3626.99',
 638         '72.0.3626.98',
 639         '74.0.3694.3',
 640         '73.0.3683.24',
 641         '72.0.3626.97',
 642         '72.0.3626.96',
 643         '72.0.3626.95',
 644         '73.0.3683.23',
 645         '72.0.3626.94',
 646         '73.0.3683.22',
 647         '73.0.3683.21',
 648         '72.0.3626.93',
 649         '74.0.3694.2',
 650         '72.0.3626.92',
 651         '74.0.3694.1',
 652         '74.0.3694.0',
 653         '74.0.3693.6',
 654         '73.0.3683.20',
 655         '72.0.3626.91',
 656         '74.0.3693.5',
 657         '74.0.3693.4',
 658         '74.0.3693.3',
 659         '74.0.3693.2',
 660         '73.0.3683.19',
 661         '74.0.3693.1',
 662         '74.0.3693.0',
 663         '73.0.3683.18',
 664         '72.0.3626.90',
 665         '74.0.3692.1',
 666         '74.0.3692.0',
 667         '73.0.3683.17',
 668         '72.0.3626.89',
 669         '74.0.3687.3',
 670         '74.0.3691.1',
 671         '74.0.3691.0',
 672         '73.0.3683.16',
 673         '72.0.3626.88',
 674         '72.0.3626.87',
 675         '73.0.3683.15',
 676         '74.0.3690.1',
 677         '74.0.3690.0',
 678         '73.0.3683.14',
 679         '72.0.3626.86',
 680         '73.0.3683.13',
 681         '73.0.3683.12',
 682         '74.0.3689.1',
 683         '74.0.3689.0',
 684         '73.0.3683.11',
 685         '72.0.3626.85',
 686         '73.0.3683.10',
 687         '72.0.3626.84',
 688         '73.0.3683.9',
 689         '74.0.3688.1',
 690         '74.0.3688.0',
 691         '73.0.3683.8',
 692         '72.0.3626.83',
 693         '74.0.3687.2',
 694         '74.0.3687.1',
 695         '74.0.3687.0',
 696         '73.0.3683.7',
 697         '72.0.3626.82',
 698         '74.0.3686.4',
 699         '72.0.3626.81',
 700         '74.0.3686.3',
 701         '74.0.3686.2',
 702         '74.0.3686.1',
 703         '74.0.3686.0',
 704         '73.0.3683.6',
 705         '72.0.3626.80',
 706         '74.0.3685.1',
 707         '74.0.3685.0',
 708         '73.0.3683.5',
 709         '72.0.3626.79',
 710         '74.0.3684.1',
 711         '74.0.3684.0',
 712         '73.0.3683.4',
 713         '72.0.3626.78',
 714         '72.0.3626.77',
 715         '73.0.3683.3',
 716         '73.0.3683.2',
 717         '72.0.3626.76',
 718         '73.0.3683.1',
 719         '73.0.3683.0',
 720         '72.0.3626.75',
 721         '71.0.3578.141',
 722         '73.0.3682.1',
 723         '73.0.3682.0',
 724         '72.0.3626.74',
 725         '71.0.3578.140',
 726         '73.0.3681.4',
 727         '73.0.3681.3',
 728         '73.0.3681.2',
 729         '73.0.3681.1',
 730         '73.0.3681.0',
 731         '72.0.3626.73',
 732         '71.0.3578.139',
 733         '72.0.3626.72',
 734         '72.0.3626.71',
 735         '73.0.3680.1',
 736         '73.0.3680.0',
 737         '72.0.3626.70',
 738         '71.0.3578.138',
 739         '73.0.3678.2',
 740         '73.0.3679.1',
 741         '73.0.3679.0',
 742         '72.0.3626.69',
 743         '71.0.3578.137',
 744         '73.0.3678.1',
 745         '73.0.3678.0',
 746         '71.0.3578.136',
 747         '73.0.3677.1',
 748         '73.0.3677.0',
 749         '72.0.3626.68',
 750         '72.0.3626.67',
 751         '71.0.3578.135',
 752         '73.0.3676.1',
 753         '73.0.3676.0',
 754         '73.0.3674.2',
 755         '72.0.3626.66',
 756         '71.0.3578.134',
 757         '73.0.3674.1',
 758         '73.0.3674.0',
 759         '72.0.3626.65',
 760         '71.0.3578.133',
 761         '73.0.3673.2',
 762         '73.0.3673.1',
 763         '73.0.3673.0',
 764         '72.0.3626.64',
 765         '71.0.3578.132',
 766         '72.0.3626.63',
 767         '72.0.3626.62',
 768         '72.0.3626.61',
 769         '72.0.3626.60',
 770         '73.0.3672.1',
 771         '73.0.3672.0',
 772         '72.0.3626.59',
 773         '71.0.3578.131',
 774         '73.0.3671.3',
 775         '73.0.3671.2',
 776         '73.0.3671.1',
 777         '73.0.3671.0',
 778         '72.0.3626.58',
 779         '71.0.3578.130',
 780         '73.0.3670.1',
 781         '73.0.3670.0',
 782         '72.0.3626.57',
 783         '71.0.3578.129',
 784         '73.0.3669.1',
 785         '73.0.3669.0',
 786         '72.0.3626.56',
 787         '71.0.3578.128',
 788         '73.0.3668.2',
 789         '73.0.3668.1',
 790         '73.0.3668.0',
 791         '72.0.3626.55',
 792         '71.0.3578.127',
 793         '73.0.3667.2',
 794         '73.0.3667.1',
 795         '73.0.3667.0',
 796         '72.0.3626.54',
 797         '71.0.3578.126',
 798         '73.0.3666.1',
 799         '73.0.3666.0',
 800         '72.0.3626.53',
 801         '71.0.3578.125',
 802         '73.0.3665.4',
 803         '73.0.3665.3',
 804         '72.0.3626.52',
 805         '73.0.3665.2',
 806         '73.0.3664.4',
 807         '73.0.3665.1',
 808         '73.0.3665.0',
 809         '72.0.3626.51',
 810         '71.0.3578.124',
 811         '72.0.3626.50',
 812         '73.0.3664.3',
 813         '73.0.3664.2',
 814         '73.0.3664.1',
 815         '73.0.3664.0',
 816         '73.0.3663.2',
 817         '72.0.3626.49',
 818         '71.0.3578.123',
 819         '73.0.3663.1',
 820         '73.0.3663.0',
 821         '72.0.3626.48',
 822         '71.0.3578.122',
 823         '73.0.3662.1',
 824         '73.0.3662.0',
 825         '72.0.3626.47',
 826         '71.0.3578.121',
 827         '73.0.3661.1',
 828         '72.0.3626.46',
 829         '73.0.3661.0',
 830         '72.0.3626.45',
 831         '71.0.3578.120',
 832         '73.0.3660.2',
 833         '73.0.3660.1',
 834         '73.0.3660.0',
 835         '72.0.3626.44',
 836         '71.0.3578.119',
 837         '73.0.3659.1',
 838         '73.0.3659.0',
 839         '72.0.3626.43',
 840         '71.0.3578.118',
 841         '73.0.3658.1',
 842         '73.0.3658.0',
 843         '72.0.3626.42',
 844         '71.0.3578.117',
 845         '73.0.3657.1',
 846         '73.0.3657.0',
 847         '72.0.3626.41',
 848         '71.0.3578.116',
 849         '73.0.3656.1',
 850         '73.0.3656.0',
 851         '72.0.3626.40',
 852         '71.0.3578.115',
 853         '73.0.3655.1',
 854         '73.0.3655.0',
 855         '72.0.3626.39',
 856         '71.0.3578.114',
 857         '73.0.3654.1',
 858         '73.0.3654.0',
 859         '72.0.3626.38',
 860         '71.0.3578.113',
 861         '73.0.3653.1',
 862         '73.0.3653.0',
 863         '72.0.3626.37',
 864         '71.0.3578.112',
 865         '73.0.3652.1',
 866         '73.0.3652.0',
 867         '72.0.3626.36',
 868         '71.0.3578.111',
 869         '73.0.3651.1',
 870         '73.0.3651.0',
 871         '72.0.3626.35',
 872         '71.0.3578.110',
 873         '73.0.3650.1',
 874         '73.0.3650.0',
 875         '72.0.3626.34',
 876         '71.0.3578.109',
 877         '73.0.3649.1',
 878         '73.0.3649.0',
 879         '72.0.3626.33',
 880         '71.0.3578.108',
 881         '73.0.3648.2',
 882         '73.0.3648.1',
 883         '73.0.3648.0',
 884         '72.0.3626.32',
 885         '71.0.3578.107',
 886         '73.0.3647.2',
 887         '73.0.3647.1',
 888         '73.0.3647.0',
 889         '72.0.3626.31',
 890         '71.0.3578.106',
 891         '73.0.3635.3',
 892         '73.0.3646.2',
 893         '73.0.3646.1',
 894         '73.0.3646.0',
 895         '72.0.3626.30',
 896         '71.0.3578.105',
 897         '72.0.3626.29',
 898         '73.0.3645.2',
 899         '73.0.3645.1',
 900         '73.0.3645.0',
 901         '72.0.3626.28',
 902         '71.0.3578.104',
 903         '72.0.3626.27',
 904         '72.0.3626.26',
 905         '72.0.3626.25',
 906         '72.0.3626.24',
 907         '73.0.3644.0',
 908         '73.0.3643.2',
 909         '72.0.3626.23',
 910         '71.0.3578.103',
 911         '73.0.3643.1',
 912         '73.0.3643.0',
 913         '72.0.3626.22',
 914         '71.0.3578.102',
 915         '73.0.3642.1',
 916         '73.0.3642.0',
 917         '72.0.3626.21',
 918         '71.0.3578.101',
 919         '73.0.3641.1',
 920         '73.0.3641.0',
 921         '72.0.3626.20',
 922         '71.0.3578.100',
 923         '72.0.3626.19',
 924         '73.0.3640.1',
 925         '73.0.3640.0',
 926         '72.0.3626.18',
 927         '73.0.3639.1',
 928         '71.0.3578.99',
 929         '73.0.3639.0',
 930         '72.0.3626.17',
 931         '73.0.3638.2',
 932         '72.0.3626.16',
 933         '73.0.3638.1',
 934         '73.0.3638.0',
 935         '72.0.3626.15',
 936         '71.0.3578.98',
 937         '73.0.3635.2',
 938         '71.0.3578.97',
 939         '73.0.3637.1',
 940         '73.0.3637.0',
 941         '72.0.3626.14',
 942         '71.0.3578.96',
 943         '71.0.3578.95',
 944         '72.0.3626.13',
 945         '71.0.3578.94',
 946         '73.0.3636.2',
 947         '71.0.3578.93',
 948         '73.0.3636.1',
 949         '73.0.3636.0',
 950         '72.0.3626.12',
 951         '71.0.3578.92',
 952         '73.0.3635.1',
 953         '73.0.3635.0',
 954         '72.0.3626.11',
 955         '71.0.3578.91',
 956         '73.0.3634.2',
 957         '73.0.3634.1',
 958         '73.0.3634.0',
 959         '72.0.3626.10',
 960         '71.0.3578.90',
 961         '71.0.3578.89',
 962         '73.0.3633.2',
 963         '73.0.3633.1',
 964         '73.0.3633.0',
 965         '72.0.3610.4',
 966         '72.0.3626.9',
 967         '71.0.3578.88',
 968         '73.0.3632.5',
 969         '73.0.3632.4',
 970         '73.0.3632.3',
 971         '73.0.3632.2',
 972         '73.0.3632.1',
 973         '73.0.3632.0',
 974         '72.0.3626.8',
 975         '71.0.3578.87',
 976         '73.0.3631.2',
 977         '73.0.3631.1',
 978         '73.0.3631.0',
 979         '72.0.3626.7',
 980         '71.0.3578.86',
 981         '72.0.3626.6',
 982         '73.0.3630.1',
 983         '73.0.3630.0',
 984         '72.0.3626.5',
 985         '71.0.3578.85',
 986         '72.0.3626.4',
 987         '73.0.3628.3',
 988         '73.0.3628.2',
 989         '73.0.3629.1',
 990         '73.0.3629.0',
 991         '72.0.3626.3',
 992         '71.0.3578.84',
 993         '73.0.3628.1',
 994         '73.0.3628.0',
 995         '71.0.3578.83',
 996         '73.0.3627.1',
 997         '73.0.3627.0',
 998         '72.0.3626.2',
 999         '71.0.3578.82',
1000         '71.0.3578.81',
1001         '71.0.3578.80',
1002         '72.0.3626.1',
1003         '72.0.3626.0',
1004         '71.0.3578.79',
1005         '70.0.3538.124',
1006         '71.0.3578.78',
1007         '72.0.3623.4',
1008         '72.0.3625.2',
1009         '72.0.3625.1',
1010         '72.0.3625.0',
1011         '71.0.3578.77',
1012         '70.0.3538.123',
1013         '72.0.3624.4',
1014         '72.0.3624.3',
1015         '72.0.3624.2',
1016         '71.0.3578.76',
1017         '72.0.3624.1',
1018         '72.0.3624.0',
1019         '72.0.3623.3',
1020         '71.0.3578.75',
1021         '70.0.3538.122',
1022         '71.0.3578.74',
1023         '72.0.3623.2',
1024         '72.0.3610.3',
1025         '72.0.3623.1',
1026         '72.0.3623.0',
1027         '72.0.3622.3',
1028         '72.0.3622.2',
1029         '71.0.3578.73',
1030         '70.0.3538.121',
1031         '72.0.3622.1',
1032         '72.0.3622.0',
1033         '71.0.3578.72',
1034         '70.0.3538.120',
1035         '72.0.3621.1',
1036         '72.0.3621.0',
1037         '71.0.3578.71',
1038         '70.0.3538.119',
1039         '72.0.3620.1',
1040         '72.0.3620.0',
1041         '71.0.3578.70',
1042         '70.0.3538.118',
1043         '71.0.3578.69',
1044         '72.0.3619.1',
1045         '72.0.3619.0',
1046         '71.0.3578.68',
1047         '70.0.3538.117',
1048         '71.0.3578.67',
1049         '72.0.3618.1',
1050         '72.0.3618.0',
1051         '71.0.3578.66',
1052         '70.0.3538.116',
1053         '72.0.3617.1',
1054         '72.0.3617.0',
1055         '71.0.3578.65',
1056         '70.0.3538.115',
1057         '72.0.3602.3',
1058         '71.0.3578.64',
1059         '72.0.3616.1',
1060         '72.0.3616.0',
1061         '71.0.3578.63',
1062         '70.0.3538.114',
1063         '71.0.3578.62',
1064         '72.0.3615.1',
1065         '72.0.3615.0',
1066         '71.0.3578.61',
1067         '70.0.3538.113',
1068         '72.0.3614.1',
1069         '72.0.3614.0',
1070         '71.0.3578.60',
1071         '70.0.3538.112',
1072         '72.0.3613.1',
1073         '72.0.3613.0',
1074         '71.0.3578.59',
1075         '70.0.3538.111',
1076         '72.0.3612.2',
1077         '72.0.3612.1',
1078         '72.0.3612.0',
1079         '70.0.3538.110',
1080         '71.0.3578.58',
1081         '70.0.3538.109',
1082         '72.0.3611.2',
1083         '72.0.3611.1',
1084         '72.0.3611.0',
1085         '71.0.3578.57',
1086         '70.0.3538.108',
1087         '72.0.3610.2',
1088         '71.0.3578.56',
1089         '71.0.3578.55',
1090         '72.0.3610.1',
1091         '72.0.3610.0',
1092         '71.0.3578.54',
1093         '70.0.3538.107',
1094         '71.0.3578.53',
1095         '72.0.3609.3',
1096         '71.0.3578.52',
1097         '72.0.3609.2',
1098         '71.0.3578.51',
1099         '72.0.3608.5',
1100         '72.0.3609.1',
1101         '72.0.3609.0',
1102         '71.0.3578.50',
1103         '70.0.3538.106',
1104         '72.0.3608.4',
1105         '72.0.3608.3',
1106         '72.0.3608.2',
1107         '71.0.3578.49',
1108         '72.0.3608.1',
1109         '72.0.3608.0',
1110         '70.0.3538.105',
1111         '71.0.3578.48',
1112         '72.0.3607.1',
1113         '72.0.3607.0',
1114         '71.0.3578.47',
1115         '70.0.3538.104',
1116         '72.0.3606.2',
1117         '72.0.3606.1',
1118         '72.0.3606.0',
1119         '71.0.3578.46',
1120         '70.0.3538.103',
1121         '70.0.3538.102',
1122         '72.0.3605.3',
1123         '72.0.3605.2',
1124         '72.0.3605.1',
1125         '72.0.3605.0',
1126         '71.0.3578.45',
1127         '70.0.3538.101',
1128         '71.0.3578.44',
1129         '71.0.3578.43',
1130         '70.0.3538.100',
1131         '70.0.3538.99',
1132         '71.0.3578.42',
1133         '72.0.3604.1',
1134         '72.0.3604.0',
1135         '71.0.3578.41',
1136         '70.0.3538.98',
1137         '71.0.3578.40',
1138         '72.0.3603.2',
1139         '72.0.3603.1',
1140         '72.0.3603.0',
1141         '71.0.3578.39',
1142         '70.0.3538.97',
1143         '72.0.3602.2',
1144         '71.0.3578.38',
1145         '71.0.3578.37',
1146         '72.0.3602.1',
1147         '72.0.3602.0',
1148         '71.0.3578.36',
1149         '70.0.3538.96',
1150         '72.0.3601.1',
1151         '72.0.3601.0',
1152         '71.0.3578.35',
1153         '70.0.3538.95',
1154         '72.0.3600.1',
1155         '72.0.3600.0',
1156         '71.0.3578.34',
1157         '70.0.3538.94',
1158         '72.0.3599.3',
1159         '72.0.3599.2',
1160         '72.0.3599.1',
1161         '72.0.3599.0',
1162         '71.0.3578.33',
1163         '70.0.3538.93',
1164         '72.0.3598.1',
1165         '72.0.3598.0',
1166         '71.0.3578.32',
1167         '70.0.3538.87',
1168         '72.0.3597.1',
1169         '72.0.3597.0',
1170         '72.0.3596.2',
1171         '71.0.3578.31',
1172         '70.0.3538.86',
1173         '71.0.3578.30',
1174         '71.0.3578.29',
1175         '72.0.3596.1',
1176         '72.0.3596.0',
1177         '71.0.3578.28',
1178         '70.0.3538.85',
1179         '72.0.3595.2',
1180         '72.0.3591.3',
1181         '72.0.3595.1',
1182         '72.0.3595.0',
1183         '71.0.3578.27',
1184         '70.0.3538.84',
1185         '72.0.3594.1',
1186         '72.0.3594.0',
1187         '71.0.3578.26',
1188         '70.0.3538.83',
1189         '72.0.3593.2',
1190         '72.0.3593.1',
1191         '72.0.3593.0',
1192         '71.0.3578.25',
1193         '70.0.3538.82',
1194         '72.0.3589.3',
1195         '72.0.3592.2',
1196         '72.0.3592.1',
1197         '72.0.3592.0',
1198         '71.0.3578.24',
1199         '72.0.3589.2',
1200         '70.0.3538.81',
1201         '70.0.3538.80',
1202         '72.0.3591.2',
1203         '72.0.3591.1',
1204         '72.0.3591.0',
1205         '71.0.3578.23',
1206         '70.0.3538.79',
1207         '71.0.3578.22',
1208         '72.0.3590.1',
1209         '72.0.3590.0',
1210         '71.0.3578.21',
1211         '70.0.3538.78',
1212         '70.0.3538.77',
1213         '72.0.3589.1',
1214         '72.0.3589.0',
1215         '71.0.3578.20',
1216         '70.0.3538.76',
1217         '71.0.3578.19',
1218         '70.0.3538.75',
1219         '72.0.3588.1',
1220         '72.0.3588.0',
1221         '71.0.3578.18',
1222         '70.0.3538.74',
1223         '72.0.3586.2',
1224         '72.0.3587.0',
1225         '71.0.3578.17',
1226         '70.0.3538.73',
1227         '72.0.3586.1',
1228         '72.0.3586.0',
1229         '71.0.3578.16',
1230         '70.0.3538.72',
1231         '72.0.3585.1',
1232         '72.0.3585.0',
1233         '71.0.3578.15',
1234         '70.0.3538.71',
1235         '71.0.3578.14',
1236         '72.0.3584.1',
1237         '72.0.3584.0',
1238         '71.0.3578.13',
1239         '70.0.3538.70',
1240         '72.0.3583.2',
1241         '71.0.3578.12',
1242         '72.0.3583.1',
1243         '72.0.3583.0',
1244         '71.0.3578.11',
1245         '70.0.3538.69',
1246         '71.0.3578.10',
1247         '72.0.3582.0',
1248         '72.0.3581.4',
1249         '71.0.3578.9',
1250         '70.0.3538.67',
1251         '72.0.3581.3',
1252         '72.0.3581.2',
1253         '72.0.3581.1',
1254         '72.0.3581.0',
1255         '71.0.3578.8',
1256         '70.0.3538.66',
1257         '72.0.3580.1',
1258         '72.0.3580.0',
1259         '71.0.3578.7',
1260         '70.0.3538.65',
1261         '71.0.3578.6',
1262         '72.0.3579.1',
1263         '72.0.3579.0',
1264         '71.0.3578.5',
1265         '70.0.3538.64',
1266         '71.0.3578.4',
1267         '71.0.3578.3',
1268         '71.0.3578.2',
1269         '71.0.3578.1',
1270         '71.0.3578.0',
1271         '70.0.3538.63',
1272         '69.0.3497.128',
1273         '70.0.3538.62',
1274         '70.0.3538.61',
1275         '70.0.3538.60',
1276         '70.0.3538.59',
1277         '71.0.3577.1',
1278         '71.0.3577.0',
1279         '70.0.3538.58',
1280         '69.0.3497.127',
1281         '71.0.3576.2',
1282         '71.0.3576.1',
1283         '71.0.3576.0',
1284         '70.0.3538.57',
1285         '70.0.3538.56',
1286         '71.0.3575.2',
1287         '70.0.3538.55',
1288         '69.0.3497.126',
1289         '70.0.3538.54',
1290         '71.0.3575.1',
1291         '71.0.3575.0',
1292         '71.0.3574.1',
1293         '71.0.3574.0',
1294         '70.0.3538.53',
1295         '69.0.3497.125',
1296         '70.0.3538.52',
1297         '71.0.3573.1',
1298         '71.0.3573.0',
1299         '70.0.3538.51',
1300         '69.0.3497.124',
1301         '71.0.3572.1',
1302         '71.0.3572.0',
1303         '70.0.3538.50',
1304         '69.0.3497.123',
1305         '71.0.3571.2',
1306         '70.0.3538.49',
1307         '69.0.3497.122',
1308         '71.0.3571.1',
1309         '71.0.3571.0',
1310         '70.0.3538.48',
1311         '69.0.3497.121',
1312         '71.0.3570.1',
1313         '71.0.3570.0',
1314         '70.0.3538.47',
1315         '69.0.3497.120',
1316         '71.0.3568.2',
1317         '71.0.3569.1',
1318         '71.0.3569.0',
1319         '70.0.3538.46',
1320         '69.0.3497.119',
1321         '70.0.3538.45',
1322         '71.0.3568.1',
1323         '71.0.3568.0',
1324         '70.0.3538.44',
1325         '69.0.3497.118',
1326         '70.0.3538.43',
1327         '70.0.3538.42',
1328         '71.0.3567.1',
1329         '71.0.3567.0',
1330         '70.0.3538.41',
1331         '69.0.3497.117',
1332         '71.0.3566.1',
1333         '71.0.3566.0',
1334         '70.0.3538.40',
1335         '69.0.3497.116',
1336         '71.0.3565.1',
1337         '71.0.3565.0',
1338         '70.0.3538.39',
1339         '69.0.3497.115',
1340         '71.0.3564.1',
1341         '71.0.3564.0',
1342         '70.0.3538.38',
1343         '69.0.3497.114',
1344         '71.0.3563.0',
1345         '71.0.3562.2',
1346         '70.0.3538.37',
1347         '69.0.3497.113',
1348         '70.0.3538.36',
1349         '70.0.3538.35',
1350         '71.0.3562.1',
1351         '71.0.3562.0',
1352         '70.0.3538.34',
1353         '69.0.3497.112',
1354         '70.0.3538.33',
1355         '71.0.3561.1',
1356         '71.0.3561.0',
1357         '70.0.3538.32',
1358         '69.0.3497.111',
1359         '71.0.3559.6',
1360         '71.0.3560.1',
1361         '71.0.3560.0',
1362         '71.0.3559.5',
1363         '71.0.3559.4',
1364         '70.0.3538.31',
1365         '69.0.3497.110',
1366         '71.0.3559.3',
1367         '70.0.3538.30',
1368         '69.0.3497.109',
1369         '71.0.3559.2',
1370         '71.0.3559.1',
1371         '71.0.3559.0',
1372         '70.0.3538.29',
1373         '69.0.3497.108',
1374         '71.0.3558.2',
1375         '71.0.3558.1',
1376         '71.0.3558.0',
1377         '70.0.3538.28',
1378         '69.0.3497.107',
1379         '71.0.3557.2',
1380         '71.0.3557.1',
1381         '71.0.3557.0',
1382         '70.0.3538.27',
1383         '69.0.3497.106',
1384         '71.0.3554.4',
1385         '70.0.3538.26',
1386         '71.0.3556.1',
1387         '71.0.3556.0',
1388         '70.0.3538.25',
1389         '71.0.3554.3',
1390         '69.0.3497.105',
1391         '71.0.3554.2',
1392         '70.0.3538.24',
1393         '69.0.3497.104',
1394         '71.0.3555.2',
1395         '70.0.3538.23',
1396         '71.0.3555.1',
1397         '71.0.3555.0',
1398         '70.0.3538.22',
1399         '69.0.3497.103',
1400         '71.0.3554.1',
1401         '71.0.3554.0',
1402         '70.0.3538.21',
1403         '69.0.3497.102',
1404         '71.0.3553.3',
1405         '70.0.3538.20',
1406         '69.0.3497.101',
1407         '71.0.3553.2',
1408         '69.0.3497.100',
1409         '71.0.3553.1',
1410         '71.0.3553.0',
1411         '70.0.3538.19',
1412         '69.0.3497.99',
1413         '69.0.3497.98',
1414         '69.0.3497.97',
1415         '71.0.3552.6',
1416         '71.0.3552.5',
1417         '71.0.3552.4',
1418         '71.0.3552.3',
1419         '71.0.3552.2',
1420         '71.0.3552.1',
1421         '71.0.3552.0',
1422         '70.0.3538.18',
1423         '69.0.3497.96',
1424         '71.0.3551.3',
1425         '71.0.3551.2',
1426         '71.0.3551.1',
1427         '71.0.3551.0',
1428         '70.0.3538.17',
1429         '69.0.3497.95',
1430         '71.0.3550.3',
1431         '71.0.3550.2',
1432         '71.0.3550.1',
1433         '71.0.3550.0',
1434         '70.0.3538.16',
1435         '69.0.3497.94',
1436         '71.0.3549.1',
1437         '71.0.3549.0',
1438         '70.0.3538.15',
1439         '69.0.3497.93',
1440         '69.0.3497.92',
1441         '71.0.3548.1',
1442         '71.0.3548.0',
1443         '70.0.3538.14',
1444         '69.0.3497.91',
1445         '71.0.3547.1',
1446         '71.0.3547.0',
1447         '70.0.3538.13',
1448         '69.0.3497.90',
1449         '71.0.3546.2',
1450         '69.0.3497.89',
1451         '71.0.3546.1',
1452         '71.0.3546.0',
1453         '70.0.3538.12',
1454         '69.0.3497.88',
1455         '71.0.3545.4',
1456         '71.0.3545.3',
1457         '71.0.3545.2',
1458         '71.0.3545.1',
1459         '71.0.3545.0',
1460         '70.0.3538.11',
1461         '69.0.3497.87',
1462         '71.0.3544.5',
1463         '71.0.3544.4',
1464         '71.0.3544.3',
1465         '71.0.3544.2',
1466         '71.0.3544.1',
1467         '71.0.3544.0',
1468         '69.0.3497.86',
1469         '70.0.3538.10',
1470         '69.0.3497.85',
1471         '70.0.3538.9',
1472         '69.0.3497.84',
1473         '71.0.3543.4',
1474         '70.0.3538.8',
1475         '71.0.3543.3',
1476         '71.0.3543.2',
1477         '71.0.3543.1',
1478         '71.0.3543.0',
1479         '70.0.3538.7',
1480         '69.0.3497.83',
1481         '71.0.3542.2',
1482         '71.0.3542.1',
1483         '71.0.3542.0',
1484         '70.0.3538.6',
1485         '69.0.3497.82',
1486         '69.0.3497.81',
1487         '71.0.3541.1',
1488         '71.0.3541.0',
1489         '70.0.3538.5',
1490         '69.0.3497.80',
1491         '71.0.3540.1',
1492         '71.0.3540.0',
1493         '70.0.3538.4',
1494         '69.0.3497.79',
1495         '70.0.3538.3',
1496         '71.0.3539.1',
1497         '71.0.3539.0',
1498         '69.0.3497.78',
1499         '68.0.3440.134',
1500         '69.0.3497.77',
1501         '70.0.3538.2',
1502         '70.0.3538.1',
1503         '70.0.3538.0',
1504         '69.0.3497.76',
1505         '68.0.3440.133',
1506         '69.0.3497.75',
1507         '70.0.3537.2',
1508         '70.0.3537.1',
1509         '70.0.3537.0',
1510         '69.0.3497.74',
1511         '68.0.3440.132',
1512         '70.0.3536.0',
1513         '70.0.3535.5',
1514         '70.0.3535.4',
1515         '70.0.3535.3',
1516         '69.0.3497.73',
1517         '68.0.3440.131',
1518         '70.0.3532.8',
1519         '70.0.3532.7',
1520         '69.0.3497.72',
1521         '69.0.3497.71',
1522         '70.0.3535.2',
1523         '70.0.3535.1',
1524         '70.0.3535.0',
1525         '69.0.3497.70',
1526         '68.0.3440.130',
1527         '69.0.3497.69',
1528         '68.0.3440.129',
1529         '70.0.3534.4',
1530         '70.0.3534.3',
1531         '70.0.3534.2',
1532         '70.0.3534.1',
1533         '70.0.3534.0',
1534         '69.0.3497.68',
1535         '68.0.3440.128',
1536         '70.0.3533.2',
1537         '70.0.3533.1',
1538         '70.0.3533.0',
1539         '69.0.3497.67',
1540         '68.0.3440.127',
1541         '70.0.3532.6',
1542         '70.0.3532.5',
1543         '70.0.3532.4',
1544         '69.0.3497.66',
1545         '68.0.3440.126',
1546         '70.0.3532.3',
1547         '70.0.3532.2',
1548         '70.0.3532.1',
1549         '69.0.3497.60',
1550         '69.0.3497.65',
1551         '69.0.3497.64',
1552         '70.0.3532.0',
1553         '70.0.3531.0',
1554         '70.0.3530.4',
1555         '70.0.3530.3',
1556         '70.0.3530.2',
1557         '69.0.3497.58',
1558         '68.0.3440.125',
1559         '69.0.3497.57',
1560         '69.0.3497.56',
1561         '69.0.3497.55',
1562         '69.0.3497.54',
1563         '70.0.3530.1',
1564         '70.0.3530.0',
1565         '69.0.3497.53',
1566         '68.0.3440.124',
1567         '69.0.3497.52',
1568         '70.0.3529.3',
1569         '70.0.3529.2',
1570         '70.0.3529.1',
1571         '70.0.3529.0',
1572         '69.0.3497.51',
1573         '70.0.3528.4',
1574         '68.0.3440.123',
1575         '70.0.3528.3',
1576         '70.0.3528.2',
1577         '70.0.3528.1',
1578         '70.0.3528.0',
1579         '69.0.3497.50',
1580         '68.0.3440.122',
1581         '70.0.3527.1',
1582         '70.0.3527.0',
1583         '69.0.3497.49',
1584         '68.0.3440.121',
1585         '70.0.3526.1',
1586         '70.0.3526.0',
1587         '68.0.3440.120',
1588         '69.0.3497.48',
1589         '69.0.3497.47',
1590         '68.0.3440.119',
1591         '68.0.3440.118',
1592         '70.0.3525.5',
1593         '70.0.3525.4',
1594         '70.0.3525.3',
1595         '68.0.3440.117',
1596         '69.0.3497.46',
1597         '70.0.3525.2',
1598         '70.0.3525.1',
1599         '70.0.3525.0',
1600         '69.0.3497.45',
1601         '68.0.3440.116',
1602         '70.0.3524.4',
1603         '70.0.3524.3',
1604         '69.0.3497.44',
1605         '70.0.3524.2',
1606         '70.0.3524.1',
1607         '70.0.3524.0',
1608         '70.0.3523.2',
1609         '69.0.3497.43',
1610         '68.0.3440.115',
1611         '70.0.3505.9',
1612         '69.0.3497.42',
1613         '70.0.3505.8',
1614         '70.0.3523.1',
1615         '70.0.3523.0',
1616         '69.0.3497.41',
1617         '68.0.3440.114',
1618         '70.0.3505.7',
1619         '69.0.3497.40',
1620         '70.0.3522.1',
1621         '70.0.3522.0',
1622         '70.0.3521.2',
1623         '69.0.3497.39',
1624         '68.0.3440.113',
1625         '70.0.3505.6',
1626         '70.0.3521.1',
1627         '70.0.3521.0',
1628         '69.0.3497.38',
1629         '68.0.3440.112',
1630         '70.0.3520.1',
1631         '70.0.3520.0',
1632         '69.0.3497.37',
1633         '68.0.3440.111',
1634         '70.0.3519.3',
1635         '70.0.3519.2',
1636         '70.0.3519.1',
1637         '70.0.3519.0',
1638         '69.0.3497.36',
1639         '68.0.3440.110',
1640         '70.0.3518.1',
1641         '70.0.3518.0',
1642         '69.0.3497.35',
1643         '69.0.3497.34',
1644         '68.0.3440.109',
1645         '70.0.3517.1',
1646         '70.0.3517.0',
1647         '69.0.3497.33',
1648         '68.0.3440.108',
1649         '69.0.3497.32',
1650         '70.0.3516.3',
1651         '70.0.3516.2',
1652         '70.0.3516.1',
1653         '70.0.3516.0',
1654         '69.0.3497.31',
1655         '68.0.3440.107',
1656         '70.0.3515.4',
1657         '68.0.3440.106',
1658         '70.0.3515.3',
1659         '70.0.3515.2',
1660         '70.0.3515.1',
1661         '70.0.3515.0',
1662         '69.0.3497.30',
1663         '68.0.3440.105',
1664         '68.0.3440.104',
1665         '70.0.3514.2',
1666         '70.0.3514.1',
1667         '70.0.3514.0',
1668         '69.0.3497.29',
1669         '68.0.3440.103',
1670         '70.0.3513.1',
1671         '70.0.3513.0',
1672         '69.0.3497.28',
1673     )
1674     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678     'User-Agent': random_user_agent(),
1679     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681     'Accept-Encoding': 'gzip, deflate',
1682     'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694     'January', 'February', 'March', 'April', 'May', 'June',
1695     'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698     'en': ENGLISH_MONTH_NAMES,
1699     'fr': [
1700         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706     'flv', 'f4v', 'f4a', 'f4b',
1707     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708     'mkv', 'mka', 'mk3d',
1709     'avi', 'divx',
1710     'mov',
1711     'asf', 'wmv', 'wma',
1712     '3gp', '3g2',
1713     'mp3',
1714     'flac',
1715     'ape',
1716     'wav',
1717     'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727     '%d %B %Y',
1728     '%d %b %Y',
1729     '%B %d %Y',
1730     '%B %dst %Y',
1731     '%B %dnd %Y',
1732     '%B %drd %Y',
1733     '%B %dth %Y',
1734     '%b %d %Y',
1735     '%b %dst %Y',
1736     '%b %dnd %Y',
1737     '%b %drd %Y',
1738     '%b %dth %Y',
1739     '%b %dst %Y %I:%M',
1740     '%b %dnd %Y %I:%M',
1741     '%b %drd %Y %I:%M',
1742     '%b %dth %Y %I:%M',
1743     '%Y %m %d',
1744     '%Y-%m-%d',
1745     '%Y/%m/%d',
1746     '%Y/%m/%d %H:%M',
1747     '%Y/%m/%d %H:%M:%S',
1748     '%Y-%m-%d %H:%M',
1749     '%Y-%m-%d %H:%M:%S',
1750     '%Y-%m-%d %H:%M:%S.%f',
1751     '%d.%m.%Y %H:%M',
1752     '%d.%m.%Y %H.%M',
1753     '%Y-%m-%dT%H:%M:%SZ',
1754     '%Y-%m-%dT%H:%M:%S.%fZ',
1755     '%Y-%m-%dT%H:%M:%S.%f0Z',
1756     '%Y-%m-%dT%H:%M:%S',
1757     '%Y-%m-%dT%H:%M:%S.%f',
1758     '%Y-%m-%dT%H:%M',
1759     '%b %d %Y at %H:%M',
1760     '%b %d %Y at %H:%M:%S',
1761     '%B %d %Y at %H:%M',
1762     '%B %d %Y at %H:%M:%S',
1763 )
1764
1765 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766 DATE_FORMATS_DAY_FIRST.extend([
1767     '%d-%m-%Y',
1768     '%d.%m.%Y',
1769     '%d.%m.%y',
1770     '%d/%m/%Y',
1771     '%d/%m/%y',
1772     '%d/%m/%Y %H:%M:%S',
1773 ])
1774
1775 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776 DATE_FORMATS_MONTH_FIRST.extend([
1777     '%m-%d-%Y',
1778     '%m.%d.%Y',
1779     '%m/%d/%Y',
1780     '%m/%d/%y',
1781     '%m/%d/%Y %H:%M:%S',
1782 ])
1783
1784 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1785 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1786
1787
1788 def preferredencoding():
1789     """Get preferred encoding.
1790
1791     Returns the best encoding scheme for the system, based on
1792     locale.getpreferredencoding() and some further tweaks.
1793     """
1794     try:
1795         pref = locale.getpreferredencoding()
1796         'TEST'.encode(pref)
1797     except Exception:
1798         pref = 'UTF-8'
1799
1800     return pref
1801
1802
1803 def write_json_file(obj, fn):
1804     """ Encode obj as JSON and write it to fn, atomically if possible """
1805
1806     fn = encodeFilename(fn)
1807     if sys.version_info < (3, 0) and sys.platform != 'win32':
1808         encoding = get_filesystem_encoding()
1809         # os.path.basename returns a bytes object, but NamedTemporaryFile
1810         # will fail if the filename contains non ascii characters unless we
1811         # use a unicode object
1812         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813         # the same for os.path.dirname
1814         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815     else:
1816         path_basename = os.path.basename
1817         path_dirname = os.path.dirname
1818
1819     args = {
1820         'suffix': '.tmp',
1821         'prefix': path_basename(fn) + '.',
1822         'dir': path_dirname(fn),
1823         'delete': False,
1824     }
1825
1826     # In Python 2.x, json.dump expects a bytestream.
1827     # In Python 3.x, it writes to a character stream
1828     if sys.version_info < (3, 0):
1829         args['mode'] = 'wb'
1830     else:
1831         args.update({
1832             'mode': 'w',
1833             'encoding': 'utf-8',
1834         })
1835
1836     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1837
1838     try:
1839         with tf:
1840             json.dump(obj, tf, default=repr)
1841         if sys.platform == 'win32':
1842             # Need to remove existing file on Windows, else os.rename raises
1843             # WindowsError or FileExistsError.
1844             try:
1845                 os.unlink(fn)
1846             except OSError:
1847                 pass
1848         try:
1849             mask = os.umask(0)
1850             os.umask(mask)
1851             os.chmod(tf.name, 0o666 & ~mask)
1852         except OSError:
1853             pass
1854         os.rename(tf.name, fn)
1855     except Exception:
1856         try:
1857             os.remove(tf.name)
1858         except OSError:
1859             pass
1860         raise
1861
1862
1863 if sys.version_info >= (2, 7):
1864     def find_xpath_attr(node, xpath, key, val=None):
1865         """ Find the xpath xpath[@key=val] """
1866         assert re.match(r'^[a-zA-Z_-]+$', key)
1867         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1868         return node.find(expr)
1869 else:
1870     def find_xpath_attr(node, xpath, key, val=None):
1871         for f in node.findall(compat_xpath(xpath)):
1872             if key not in f.attrib:
1873                 continue
1874             if val is None or f.attrib.get(key) == val:
1875                 return f
1876         return None
1877
1878 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1879 # the namespace parameter
1880
1881
1882 def xpath_with_ns(path, ns_map):
1883     components = [c.split(':') for c in path.split('/')]
1884     replaced = []
1885     for c in components:
1886         if len(c) == 1:
1887             replaced.append(c[0])
1888         else:
1889             ns, tag = c
1890             replaced.append('{%s}%s' % (ns_map[ns], tag))
1891     return '/'.join(replaced)
1892
1893
1894 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1895     def _find_xpath(xpath):
1896         return node.find(compat_xpath(xpath))
1897
1898     if isinstance(xpath, (str, compat_str)):
1899         n = _find_xpath(xpath)
1900     else:
1901         for xp in xpath:
1902             n = _find_xpath(xp)
1903             if n is not None:
1904                 break
1905
1906     if n is None:
1907         if default is not NO_DEFAULT:
1908             return default
1909         elif fatal:
1910             name = xpath if name is None else name
1911             raise ExtractorError('Could not find XML element %s' % name)
1912         else:
1913             return None
1914     return n
1915
1916
1917 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1918     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919     if n is None or n == default:
1920         return n
1921     if n.text is None:
1922         if default is not NO_DEFAULT:
1923             return default
1924         elif fatal:
1925             name = xpath if name is None else name
1926             raise ExtractorError('Could not find XML element\'s text %s' % name)
1927         else:
1928             return None
1929     return n.text
1930
1931
1932 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933     n = find_xpath_attr(node, xpath, key)
1934     if n is None:
1935         if default is not NO_DEFAULT:
1936             return default
1937         elif fatal:
1938             name = '%s[@%s]' % (xpath, key) if name is None else name
1939             raise ExtractorError('Could not find XML attribute %s' % name)
1940         else:
1941             return None
1942     return n.attrib[key]
1943
1944
1945 def get_element_by_id(id, html):
1946     """Return the content of the tag with the specified ID in the passed HTML document"""
1947     return get_element_by_attribute('id', id, html)
1948
1949
1950 def get_element_by_class(class_name, html):
1951     """Return the content of the first tag with the specified class in the passed HTML document"""
1952     retval = get_elements_by_class(class_name, html)
1953     return retval[0] if retval else None
1954
1955
1956 def get_element_by_attribute(attribute, value, html, escape_value=True):
1957     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958     return retval[0] if retval else None
1959
1960
1961 def get_elements_by_class(class_name, html):
1962     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963     return get_elements_by_attribute(
1964         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965         html, escape_value=False)
1966
1967
1968 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1969     """Return the content of the tag with the specified attribute in the passed HTML document"""
1970
1971     value = re.escape(value) if escape_value else value
1972
1973     retlist = []
1974     for m in re.finditer(r'''(?xs)
1975         <([a-zA-Z0-9:._-]+)
1976          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1977          \s+%s=['"]?%s['"]?
1978          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1979         \s*>
1980         (?P<content>.*?)
1981         </\1>
1982     ''' % (re.escape(attribute), value), html):
1983         res = m.group('content')
1984
1985         if res.startswith('"') or res.startswith("'"):
1986             res = res[1:-1]
1987
1988         retlist.append(unescapeHTML(res))
1989
1990     return retlist
1991
1992
1993 class HTMLAttributeParser(compat_HTMLParser):
1994     """Trivial HTML parser to gather the attributes for a single element"""
1995
1996     def __init__(self):
1997         self.attrs = {}
1998         compat_HTMLParser.__init__(self)
1999
2000     def handle_starttag(self, tag, attrs):
2001         self.attrs = dict(attrs)
2002
2003
2004 def extract_attributes(html_element):
2005     """Given a string for an HTML element such as
2006     <el
2007          a="foo" B="bar" c="&98;az" d=boz
2008          empty= noval entity="&amp;"
2009          sq='"' dq="'"
2010     >
2011     Decode and return a dictionary of attributes.
2012     {
2013         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014         'empty': '', 'noval': None, 'entity': '&',
2015         'sq': '"', 'dq': '\''
2016     }.
2017     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019     """
2020     parser = HTMLAttributeParser()
2021     try:
2022         parser.feed(html_element)
2023         parser.close()
2024     # Older Python may throw HTMLParseError in case of malformed HTML
2025     except compat_HTMLParseError:
2026         pass
2027     return parser.attrs
2028
2029
2030 def clean_html(html):
2031     """Clean an HTML snippet into a readable string"""
2032
2033     if html is None:  # Convenience for sanitizing descriptions etc.
2034         return html
2035
2036     # Newline vs <br />
2037     html = html.replace('\n', ' ')
2038     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2040     # Strip html tags
2041     html = re.sub('<.*?>', '', html)
2042     # Replace html entities
2043     html = unescapeHTML(html)
2044     return html.strip()
2045
2046
2047 def sanitize_open(filename, open_mode):
2048     """Try to open the given filename, and slightly tweak it if this fails.
2049
2050     Attempts to open the given filename. If this fails, it tries to change
2051     the filename slightly, step by step, until it's either able to open it
2052     or it fails and raises a final exception, like the standard open()
2053     function.
2054
2055     It returns the tuple (stream, definitive_file_name).
2056     """
2057     try:
2058         if filename == '-':
2059             if sys.platform == 'win32':
2060                 import msvcrt
2061                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2062             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2063         stream = open(encodeFilename(filename), open_mode)
2064         return (stream, filename)
2065     except (IOError, OSError) as err:
2066         if err.errno in (errno.EACCES,):
2067             raise
2068
2069         # In case of error, try to remove win32 forbidden chars
2070         alt_filename = sanitize_path(filename)
2071         if alt_filename == filename:
2072             raise
2073         else:
2074             # An exception here should be caught in the caller
2075             stream = open(encodeFilename(alt_filename), open_mode)
2076             return (stream, alt_filename)
2077
2078
2079 def timeconvert(timestr):
2080     """Convert RFC 2822 defined time string into system timestamp"""
2081     timestamp = None
2082     timetuple = email.utils.parsedate_tz(timestr)
2083     if timetuple is not None:
2084         timestamp = email.utils.mktime_tz(timetuple)
2085     return timestamp
2086
2087
2088 def sanitize_filename(s, restricted=False, is_id=False):
2089     """Sanitizes a string so it could be used as part of a filename.
2090     If restricted is set, use a stricter subset of allowed characters.
2091     Set is_id if this is not an arbitrary string, but an ID that should be kept
2092     if possible.
2093     """
2094     def replace_insane(char):
2095         if restricted and char in ACCENT_CHARS:
2096             return ACCENT_CHARS[char]
2097         if char == '?' or ord(char) < 32 or ord(char) == 127:
2098             return ''
2099         elif char == '"':
2100             return '' if restricted else '\''
2101         elif char == ':':
2102             return '_-' if restricted else ' -'
2103         elif char in '\\/|*<>':
2104             return '_'
2105         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2106             return '_'
2107         if restricted and ord(char) > 127:
2108             return '_'
2109         return char
2110
2111     # Handle timestamps
2112     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2113     result = ''.join(map(replace_insane, s))
2114     if not is_id:
2115         while '__' in result:
2116             result = result.replace('__', '_')
2117         result = result.strip('_')
2118         # Common case of "Foreign band name - English song title"
2119         if restricted and result.startswith('-_'):
2120             result = result[2:]
2121         if result.startswith('-'):
2122             result = '_' + result[len('-'):]
2123         result = result.lstrip('.')
2124         if not result:
2125             result = '_'
2126     return result
2127
2128
2129 def sanitize_path(s, force=False):
2130     """Sanitizes and normalizes path on Windows"""
2131     if sys.platform == 'win32':
2132         force = False
2133         drive_or_unc, _ = os.path.splitdrive(s)
2134         if sys.version_info < (2, 7) and not drive_or_unc:
2135             drive_or_unc, _ = os.path.splitunc(s)
2136     elif force:
2137         drive_or_unc = ''
2138     else:
2139         return s
2140
2141     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142     if drive_or_unc:
2143         norm_path.pop(0)
2144     sanitized_path = [
2145         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2146         for path_part in norm_path]
2147     if drive_or_unc:
2148         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2149     elif force and s[0] == os.path.sep:
2150         sanitized_path.insert(0, os.path.sep)
2151     return os.path.join(*sanitized_path)
2152
2153
2154 def sanitize_url(url):
2155     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156     # the number of unwanted failures due to missing protocol
2157     if url.startswith('//'):
2158         return 'http:%s' % url
2159     # Fix some common typos seen so far
2160     COMMON_TYPOS = (
2161         # https://github.com/ytdl-org/youtube-dl/issues/15649
2162         (r'^httpss://', r'https://'),
2163         # https://bx1.be/lives/direct-tv/
2164         (r'^rmtp([es]?)://', r'rtmp\1://'),
2165     )
2166     for mistake, fixup in COMMON_TYPOS:
2167         if re.match(mistake, url):
2168             return re.sub(mistake, fixup, url)
2169     return escape_url(url)
2170
2171
2172 def sanitized_Request(url, *args, **kwargs):
2173     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2174
2175
2176 def expand_path(s):
2177     """Expand shell variables and ~"""
2178     return os.path.expandvars(compat_expanduser(s))
2179
2180
2181 def orderedSet(iterable):
2182     """ Remove all duplicates from the input iterable """
2183     res = []
2184     for el in iterable:
2185         if el not in res:
2186             res.append(el)
2187     return res
2188
2189
2190 def _htmlentity_transform(entity_with_semicolon):
2191     """Transforms an HTML entity to a character."""
2192     entity = entity_with_semicolon[:-1]
2193
2194     # Known non-numeric HTML entity
2195     if entity in compat_html_entities.name2codepoint:
2196         return compat_chr(compat_html_entities.name2codepoint[entity])
2197
2198     # TODO: HTML5 allows entities without a semicolon. For example,
2199     # '&Eacuteric' should be decoded as 'Éric'.
2200     if entity_with_semicolon in compat_html_entities_html5:
2201         return compat_html_entities_html5[entity_with_semicolon]
2202
2203     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2204     if mobj is not None:
2205         numstr = mobj.group(1)
2206         if numstr.startswith('x'):
2207             base = 16
2208             numstr = '0%s' % numstr
2209         else:
2210             base = 10
2211         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2212         try:
2213             return compat_chr(int(numstr, base))
2214         except ValueError:
2215             pass
2216
2217     # Unknown entity in name, return its literal representation
2218     return '&%s;' % entity
2219
2220
2221 def unescapeHTML(s):
2222     if s is None:
2223         return None
2224     assert type(s) == compat_str
2225
2226     return re.sub(
2227         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2228
2229
2230 def process_communicate_or_kill(p, *args, **kwargs):
2231     try:
2232         return p.communicate(*args, **kwargs)
2233     except BaseException:  # Including KeyboardInterrupt
2234         p.kill()
2235         p.wait()
2236         raise
2237
2238
2239 def get_subprocess_encoding():
2240     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241         # For subprocess calls, encode with locale encoding
2242         # Refer to http://stackoverflow.com/a/9951851/35070
2243         encoding = preferredencoding()
2244     else:
2245         encoding = sys.getfilesystemencoding()
2246     if encoding is None:
2247         encoding = 'utf-8'
2248     return encoding
2249
2250
2251 def encodeFilename(s, for_subprocess=False):
2252     """
2253     @param s The name of the file
2254     """
2255
2256     assert type(s) == compat_str
2257
2258     # Python 3 has a Unicode API
2259     if sys.version_info >= (3, 0):
2260         return s
2261
2262     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266         return s
2267
2268     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269     if sys.platform.startswith('java'):
2270         return s
2271
2272     return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275 def decodeFilename(b, for_subprocess=False):
2276
2277     if sys.version_info >= (3, 0):
2278         return b
2279
2280     if not isinstance(b, bytes):
2281         return b
2282
2283     return b.decode(get_subprocess_encoding(), 'ignore')
2284
2285
2286 def encodeArgument(s):
2287     if not isinstance(s, compat_str):
2288         # Legacy code that uses byte strings
2289         # Uncomment the following line after fixing all post processors
2290         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2291         s = s.decode('ascii')
2292     return encodeFilename(s, True)
2293
2294
2295 def decodeArgument(b):
2296     return decodeFilename(b, True)
2297
2298
2299 def decodeOption(optval):
2300     if optval is None:
2301         return optval
2302     if isinstance(optval, bytes):
2303         optval = optval.decode(preferredencoding())
2304
2305     assert isinstance(optval, compat_str)
2306     return optval
2307
2308
2309 def formatSeconds(secs, delim=':'):
2310     if secs > 3600:
2311         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2312     elif secs > 60:
2313         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2314     else:
2315         return '%d' % secs
2316
2317
2318 def make_HTTPS_handler(params, **kwargs):
2319     opts_no_check_certificate = params.get('nocheckcertificate', False)
2320     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2321         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2322         if opts_no_check_certificate:
2323             context.check_hostname = False
2324             context.verify_mode = ssl.CERT_NONE
2325         try:
2326             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2327         except TypeError:
2328             # Python 2.7.8
2329             # (create_default_context present but HTTPSHandler has no context=)
2330             pass
2331
2332     if sys.version_info < (3, 2):
2333         return YoutubeDLHTTPSHandler(params, **kwargs)
2334     else:  # Python < 3.4
2335         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2336         context.verify_mode = (ssl.CERT_NONE
2337                                if opts_no_check_certificate
2338                                else ssl.CERT_REQUIRED)
2339         context.set_default_verify_paths()
2340         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2341
2342
2343 def bug_reports_message(before=';'):
2344     if ytdl_is_updateable():
2345         update_cmd = 'type  yt-dlp -U  to update'
2346     else:
2347         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2348     msg = 'please report this issue on  https://github.com/yt-dlp/yt-dlp .'
2349     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2350     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2351
2352     before = before.rstrip()
2353     if not before or before.endswith(('.', '!', '?')):
2354         msg = msg[0].title() + msg[1:]
2355
2356     return (before + ' ' if before else '') + msg
2357
2358
2359 class YoutubeDLError(Exception):
2360     """Base exception for YoutubeDL errors."""
2361     pass
2362
2363
2364 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2365 if hasattr(ssl, 'CertificateError'):
2366     network_exceptions.append(ssl.CertificateError)
2367 network_exceptions = tuple(network_exceptions)
2368
2369
2370 class ExtractorError(YoutubeDLError):
2371     """Error during info extraction."""
2372
2373     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2374         """ tb, if given, is the original traceback (so that it can be printed out).
2375         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2376         """
2377
2378         if sys.exc_info()[0] in network_exceptions:
2379             expected = True
2380         if video_id is not None:
2381             msg = video_id + ': ' + msg
2382         if cause:
2383             msg += ' (caused by %r)' % cause
2384         if not expected:
2385             msg += bug_reports_message()
2386         super(ExtractorError, self).__init__(msg)
2387
2388         self.traceback = tb
2389         self.exc_info = sys.exc_info()  # preserve original exception
2390         self.cause = cause
2391         self.video_id = video_id
2392
2393     def format_traceback(self):
2394         if self.traceback is None:
2395             return None
2396         return ''.join(traceback.format_tb(self.traceback))
2397
2398
2399 class UnsupportedError(ExtractorError):
2400     def __init__(self, url):
2401         super(UnsupportedError, self).__init__(
2402             'Unsupported URL: %s' % url, expected=True)
2403         self.url = url
2404
2405
2406 class RegexNotFoundError(ExtractorError):
2407     """Error when a regex didn't match"""
2408     pass
2409
2410
2411 class GeoRestrictedError(ExtractorError):
2412     """Geographic restriction Error exception.
2413
2414     This exception may be thrown when a video is not available from your
2415     geographic location due to geographic restrictions imposed by a website.
2416     """
2417
2418     def __init__(self, msg, countries=None):
2419         super(GeoRestrictedError, self).__init__(msg, expected=True)
2420         self.msg = msg
2421         self.countries = countries
2422
2423
2424 class DownloadError(YoutubeDLError):
2425     """Download Error exception.
2426
2427     This exception may be thrown by FileDownloader objects if they are not
2428     configured to continue on errors. They will contain the appropriate
2429     error message.
2430     """
2431
2432     def __init__(self, msg, exc_info=None):
2433         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2434         super(DownloadError, self).__init__(msg)
2435         self.exc_info = exc_info
2436
2437
2438 class EntryNotInPlaylist(YoutubeDLError):
2439     """Entry not in playlist exception.
2440
2441     This exception will be thrown by YoutubeDL when a requested entry
2442     is not found in the playlist info_dict
2443     """
2444     pass
2445
2446
2447 class SameFileError(YoutubeDLError):
2448     """Same File exception.
2449
2450     This exception will be thrown by FileDownloader objects if they detect
2451     multiple files would have to be downloaded to the same file on disk.
2452     """
2453     pass
2454
2455
2456 class PostProcessingError(YoutubeDLError):
2457     """Post Processing exception.
2458
2459     This exception may be raised by PostProcessor's .run() method to
2460     indicate an error in the postprocessing task.
2461     """
2462
2463     def __init__(self, msg):
2464         super(PostProcessingError, self).__init__(msg)
2465         self.msg = msg
2466
2467
2468 class ExistingVideoReached(YoutubeDLError):
2469     """ --max-downloads limit has been reached. """
2470     pass
2471
2472
2473 class RejectedVideoReached(YoutubeDLError):
2474     """ --max-downloads limit has been reached. """
2475     pass
2476
2477
2478 class MaxDownloadsReached(YoutubeDLError):
2479     """ --max-downloads limit has been reached. """
2480     pass
2481
2482
2483 class UnavailableVideoError(YoutubeDLError):
2484     """Unavailable Format exception.
2485
2486     This exception will be thrown when a video is requested
2487     in a format that is not available for that video.
2488     """
2489     pass
2490
2491
2492 class ContentTooShortError(YoutubeDLError):
2493     """Content Too Short exception.
2494
2495     This exception may be raised by FileDownloader objects when a file they
2496     download is too small for what the server announced first, indicating
2497     the connection was probably interrupted.
2498     """
2499
2500     def __init__(self, downloaded, expected):
2501         super(ContentTooShortError, self).__init__(
2502             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2503         )
2504         # Both in bytes
2505         self.downloaded = downloaded
2506         self.expected = expected
2507
2508
2509 class XAttrMetadataError(YoutubeDLError):
2510     def __init__(self, code=None, msg='Unknown error'):
2511         super(XAttrMetadataError, self).__init__(msg)
2512         self.code = code
2513         self.msg = msg
2514
2515         # Parsing code and msg
2516         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2517                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2518             self.reason = 'NO_SPACE'
2519         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2520             self.reason = 'VALUE_TOO_LONG'
2521         else:
2522             self.reason = 'NOT_SUPPORTED'
2523
2524
2525 class XAttrUnavailableError(YoutubeDLError):
2526     pass
2527
2528
2529 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2530     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2531     # expected HTTP responses to meet HTTP/1.0 or later (see also
2532     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2533     if sys.version_info < (3, 0):
2534         kwargs['strict'] = True
2535     hc = http_class(*args, **compat_kwargs(kwargs))
2536     source_address = ydl_handler._params.get('source_address')
2537
2538     if source_address is not None:
2539         # This is to workaround _create_connection() from socket where it will try all
2540         # address data from getaddrinfo() including IPv6. This filters the result from
2541         # getaddrinfo() based on the source_address value.
2542         # This is based on the cpython socket.create_connection() function.
2543         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2544         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2545             host, port = address
2546             err = None
2547             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2548             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2549             ip_addrs = [addr for addr in addrs if addr[0] == af]
2550             if addrs and not ip_addrs:
2551                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2552                 raise socket.error(
2553                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2554                     % (ip_version, source_address[0]))
2555             for res in ip_addrs:
2556                 af, socktype, proto, canonname, sa = res
2557                 sock = None
2558                 try:
2559                     sock = socket.socket(af, socktype, proto)
2560                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2561                         sock.settimeout(timeout)
2562                     sock.bind(source_address)
2563                     sock.connect(sa)
2564                     err = None  # Explicitly break reference cycle
2565                     return sock
2566                 except socket.error as _:
2567                     err = _
2568                     if sock is not None:
2569                         sock.close()
2570             if err is not None:
2571                 raise err
2572             else:
2573                 raise socket.error('getaddrinfo returns an empty list')
2574         if hasattr(hc, '_create_connection'):
2575             hc._create_connection = _create_connection
2576         sa = (source_address, 0)
2577         if hasattr(hc, 'source_address'):  # Python 2.7+
2578             hc.source_address = sa
2579         else:  # Python 2.6
2580             def _hc_connect(self, *args, **kwargs):
2581                 sock = _create_connection(
2582                     (self.host, self.port), self.timeout, sa)
2583                 if is_https:
2584                     self.sock = ssl.wrap_socket(
2585                         sock, self.key_file, self.cert_file,
2586                         ssl_version=ssl.PROTOCOL_TLSv1)
2587                 else:
2588                     self.sock = sock
2589             hc.connect = functools.partial(_hc_connect, hc)
2590
2591     return hc
2592
2593
2594 def handle_youtubedl_headers(headers):
2595     filtered_headers = headers
2596
2597     if 'Youtubedl-no-compression' in filtered_headers:
2598         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2599         del filtered_headers['Youtubedl-no-compression']
2600
2601     return filtered_headers
2602
2603
2604 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2605     """Handler for HTTP requests and responses.
2606
2607     This class, when installed with an OpenerDirector, automatically adds
2608     the standard headers to every HTTP request and handles gzipped and
2609     deflated responses from web servers. If compression is to be avoided in
2610     a particular request, the original request in the program code only has
2611     to include the HTTP header "Youtubedl-no-compression", which will be
2612     removed before making the real request.
2613
2614     Part of this code was copied from:
2615
2616     http://techknack.net/python-urllib2-handlers/
2617
2618     Andrew Rowls, the author of that code, agreed to release it to the
2619     public domain.
2620     """
2621
2622     def __init__(self, params, *args, **kwargs):
2623         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2624         self._params = params
2625
2626     def http_open(self, req):
2627         conn_class = compat_http_client.HTTPConnection
2628
2629         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2630         if socks_proxy:
2631             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2632             del req.headers['Ytdl-socks-proxy']
2633
2634         return self.do_open(functools.partial(
2635             _create_http_connection, self, conn_class, False),
2636             req)
2637
2638     @staticmethod
2639     def deflate(data):
2640         if not data:
2641             return data
2642         try:
2643             return zlib.decompress(data, -zlib.MAX_WBITS)
2644         except zlib.error:
2645             return zlib.decompress(data)
2646
2647     def http_request(self, req):
2648         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2649         # always respected by websites, some tend to give out URLs with non percent-encoded
2650         # non-ASCII characters (see telemb.py, ard.py [#3412])
2651         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2652         # To work around aforementioned issue we will replace request's original URL with
2653         # percent-encoded one
2654         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2655         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2656         url = req.get_full_url()
2657         url_escaped = escape_url(url)
2658
2659         # Substitute URL if any change after escaping
2660         if url != url_escaped:
2661             req = update_Request(req, url=url_escaped)
2662
2663         for h, v in std_headers.items():
2664             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2665             # The dict keys are capitalized because of this bug by urllib
2666             if h.capitalize() not in req.headers:
2667                 req.add_header(h, v)
2668
2669         req.headers = handle_youtubedl_headers(req.headers)
2670
2671         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2672             # Python 2.6 is brain-dead when it comes to fragments
2673             req._Request__original = req._Request__original.partition('#')[0]
2674             req._Request__r_type = req._Request__r_type.partition('#')[0]
2675
2676         return req
2677
2678     def http_response(self, req, resp):
2679         old_resp = resp
2680         # gzip
2681         if resp.headers.get('Content-encoding', '') == 'gzip':
2682             content = resp.read()
2683             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2684             try:
2685                 uncompressed = io.BytesIO(gz.read())
2686             except IOError as original_ioerror:
2687                 # There may be junk add the end of the file
2688                 # See http://stackoverflow.com/q/4928560/35070 for details
2689                 for i in range(1, 1024):
2690                     try:
2691                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2692                         uncompressed = io.BytesIO(gz.read())
2693                     except IOError:
2694                         continue
2695                     break
2696                 else:
2697                     raise original_ioerror
2698             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2699             resp.msg = old_resp.msg
2700             del resp.headers['Content-encoding']
2701         # deflate
2702         if resp.headers.get('Content-encoding', '') == 'deflate':
2703             gz = io.BytesIO(self.deflate(resp.read()))
2704             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2705             resp.msg = old_resp.msg
2706             del resp.headers['Content-encoding']
2707         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2708         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2709         if 300 <= resp.code < 400:
2710             location = resp.headers.get('Location')
2711             if location:
2712                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2713                 if sys.version_info >= (3, 0):
2714                     location = location.encode('iso-8859-1').decode('utf-8')
2715                 else:
2716                     location = location.decode('utf-8')
2717                 location_escaped = escape_url(location)
2718                 if location != location_escaped:
2719                     del resp.headers['Location']
2720                     if sys.version_info < (3, 0):
2721                         location_escaped = location_escaped.encode('utf-8')
2722                     resp.headers['Location'] = location_escaped
2723         return resp
2724
2725     https_request = http_request
2726     https_response = http_response
2727
2728
2729 def make_socks_conn_class(base_class, socks_proxy):
2730     assert issubclass(base_class, (
2731         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2732
2733     url_components = compat_urlparse.urlparse(socks_proxy)
2734     if url_components.scheme.lower() == 'socks5':
2735         socks_type = ProxyType.SOCKS5
2736     elif url_components.scheme.lower() in ('socks', 'socks4'):
2737         socks_type = ProxyType.SOCKS4
2738     elif url_components.scheme.lower() == 'socks4a':
2739         socks_type = ProxyType.SOCKS4A
2740
2741     def unquote_if_non_empty(s):
2742         if not s:
2743             return s
2744         return compat_urllib_parse_unquote_plus(s)
2745
2746     proxy_args = (
2747         socks_type,
2748         url_components.hostname, url_components.port or 1080,
2749         True,  # Remote DNS
2750         unquote_if_non_empty(url_components.username),
2751         unquote_if_non_empty(url_components.password),
2752     )
2753
2754     class SocksConnection(base_class):
2755         def connect(self):
2756             self.sock = sockssocket()
2757             self.sock.setproxy(*proxy_args)
2758             if type(self.timeout) in (int, float):
2759                 self.sock.settimeout(self.timeout)
2760             self.sock.connect((self.host, self.port))
2761
2762             if isinstance(self, compat_http_client.HTTPSConnection):
2763                 if hasattr(self, '_context'):  # Python > 2.6
2764                     self.sock = self._context.wrap_socket(
2765                         self.sock, server_hostname=self.host)
2766                 else:
2767                     self.sock = ssl.wrap_socket(self.sock)
2768
2769     return SocksConnection
2770
2771
2772 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2773     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2774         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2775         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2776         self._params = params
2777
2778     def https_open(self, req):
2779         kwargs = {}
2780         conn_class = self._https_conn_class
2781
2782         if hasattr(self, '_context'):  # python > 2.6
2783             kwargs['context'] = self._context
2784         if hasattr(self, '_check_hostname'):  # python 3.x
2785             kwargs['check_hostname'] = self._check_hostname
2786
2787         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2788         if socks_proxy:
2789             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2790             del req.headers['Ytdl-socks-proxy']
2791
2792         return self.do_open(functools.partial(
2793             _create_http_connection, self, conn_class, True),
2794             req, **kwargs)
2795
2796
2797 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2798     """
2799     See [1] for cookie file format.
2800
2801     1. https://curl.haxx.se/docs/http-cookies.html
2802     """
2803     _HTTPONLY_PREFIX = '#HttpOnly_'
2804     _ENTRY_LEN = 7
2805     _HEADER = '''# Netscape HTTP Cookie File
2806 # This file is generated by yt-dlp.  Do not edit.
2807
2808 '''
2809     _CookieFileEntry = collections.namedtuple(
2810         'CookieFileEntry',
2811         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2812
2813     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2814         """
2815         Save cookies to a file.
2816
2817         Most of the code is taken from CPython 3.8 and slightly adapted
2818         to support cookie files with UTF-8 in both python 2 and 3.
2819         """
2820         if filename is None:
2821             if self.filename is not None:
2822                 filename = self.filename
2823             else:
2824                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2825
2826         # Store session cookies with `expires` set to 0 instead of an empty
2827         # string
2828         for cookie in self:
2829             if cookie.expires is None:
2830                 cookie.expires = 0
2831
2832         with io.open(filename, 'w', encoding='utf-8') as f:
2833             f.write(self._HEADER)
2834             now = time.time()
2835             for cookie in self:
2836                 if not ignore_discard and cookie.discard:
2837                     continue
2838                 if not ignore_expires and cookie.is_expired(now):
2839                     continue
2840                 if cookie.secure:
2841                     secure = 'TRUE'
2842                 else:
2843                     secure = 'FALSE'
2844                 if cookie.domain.startswith('.'):
2845                     initial_dot = 'TRUE'
2846                 else:
2847                     initial_dot = 'FALSE'
2848                 if cookie.expires is not None:
2849                     expires = compat_str(cookie.expires)
2850                 else:
2851                     expires = ''
2852                 if cookie.value is None:
2853                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2854                     # with no name, whereas http.cookiejar regards it as a
2855                     # cookie with no value.
2856                     name = ''
2857                     value = cookie.name
2858                 else:
2859                     name = cookie.name
2860                     value = cookie.value
2861                 f.write(
2862                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2863                                secure, expires, name, value]) + '\n')
2864
2865     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2866         """Load cookies from a file."""
2867         if filename is None:
2868             if self.filename is not None:
2869                 filename = self.filename
2870             else:
2871                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2872
2873         def prepare_line(line):
2874             if line.startswith(self._HTTPONLY_PREFIX):
2875                 line = line[len(self._HTTPONLY_PREFIX):]
2876             # comments and empty lines are fine
2877             if line.startswith('#') or not line.strip():
2878                 return line
2879             cookie_list = line.split('\t')
2880             if len(cookie_list) != self._ENTRY_LEN:
2881                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2882             cookie = self._CookieFileEntry(*cookie_list)
2883             if cookie.expires_at and not cookie.expires_at.isdigit():
2884                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2885             return line
2886
2887         cf = io.StringIO()
2888         with io.open(filename, encoding='utf-8') as f:
2889             for line in f:
2890                 try:
2891                     cf.write(prepare_line(line))
2892                 except compat_cookiejar.LoadError as e:
2893                     write_string(
2894                         'WARNING: skipping cookie file entry due to %s: %r\n'
2895                         % (e, line), sys.stderr)
2896                     continue
2897         cf.seek(0)
2898         self._really_load(cf, filename, ignore_discard, ignore_expires)
2899         # Session cookies are denoted by either `expires` field set to
2900         # an empty string or 0. MozillaCookieJar only recognizes the former
2901         # (see [1]). So we need force the latter to be recognized as session
2902         # cookies on our own.
2903         # Session cookies may be important for cookies-based authentication,
2904         # e.g. usually, when user does not check 'Remember me' check box while
2905         # logging in on a site, some important cookies are stored as session
2906         # cookies so that not recognizing them will result in failed login.
2907         # 1. https://bugs.python.org/issue17164
2908         for cookie in self:
2909             # Treat `expires=0` cookies as session cookies
2910             if cookie.expires == 0:
2911                 cookie.expires = None
2912                 cookie.discard = True
2913
2914
2915 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2916     def __init__(self, cookiejar=None):
2917         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2918
2919     def http_response(self, request, response):
2920         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2921         # characters in Set-Cookie HTTP header of last response (see
2922         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2923         # In order to at least prevent crashing we will percent encode Set-Cookie
2924         # header before HTTPCookieProcessor starts processing it.
2925         # if sys.version_info < (3, 0) and response.headers:
2926         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2927         #         set_cookie = response.headers.get(set_cookie_header)
2928         #         if set_cookie:
2929         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2930         #             if set_cookie != set_cookie_escaped:
2931         #                 del response.headers[set_cookie_header]
2932         #                 response.headers[set_cookie_header] = set_cookie_escaped
2933         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2934
2935     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2936     https_response = http_response
2937
2938
2939 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2940     """YoutubeDL redirect handler
2941
2942     The code is based on HTTPRedirectHandler implementation from CPython [1].
2943
2944     This redirect handler solves two issues:
2945      - ensures redirect URL is always unicode under python 2
2946      - introduces support for experimental HTTP response status code
2947        308 Permanent Redirect [2] used by some sites [3]
2948
2949     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2950     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2951     3. https://github.com/ytdl-org/youtube-dl/issues/28768
2952     """
2953
2954     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2955
2956     def redirect_request(self, req, fp, code, msg, headers, newurl):
2957         """Return a Request or None in response to a redirect.
2958
2959         This is called by the http_error_30x methods when a
2960         redirection response is received.  If a redirection should
2961         take place, return a new Request to allow http_error_30x to
2962         perform the redirect.  Otherwise, raise HTTPError if no-one
2963         else should try to handle this url.  Return None if you can't
2964         but another Handler might.
2965         """
2966         m = req.get_method()
2967         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2968                  or code in (301, 302, 303) and m == "POST")):
2969             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2970         # Strictly (according to RFC 2616), 301 or 302 in response to
2971         # a POST MUST NOT cause a redirection without confirmation
2972         # from the user (of urllib.request, in this case).  In practice,
2973         # essentially all clients do redirect in this case, so we do
2974         # the same.
2975
2976         # On python 2 urlh.geturl() may sometimes return redirect URL
2977         # as byte string instead of unicode. This workaround allows
2978         # to force it always return unicode.
2979         if sys.version_info[0] < 3:
2980             newurl = compat_str(newurl)
2981
2982         # Be conciliant with URIs containing a space.  This is mainly
2983         # redundant with the more complete encoding done in http_error_302(),
2984         # but it is kept for compatibility with other callers.
2985         newurl = newurl.replace(' ', '%20')
2986
2987         CONTENT_HEADERS = ("content-length", "content-type")
2988         # NB: don't use dict comprehension for python 2.6 compatibility
2989         newheaders = dict((k, v) for k, v in req.headers.items()
2990                           if k.lower() not in CONTENT_HEADERS)
2991         return compat_urllib_request.Request(
2992             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2993             unverifiable=True)
2994
2995
2996 def extract_timezone(date_str):
2997     m = re.search(
2998         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2999         date_str)
3000     if not m:
3001         timezone = datetime.timedelta()
3002     else:
3003         date_str = date_str[:-len(m.group('tz'))]
3004         if not m.group('sign'):
3005             timezone = datetime.timedelta()
3006         else:
3007             sign = 1 if m.group('sign') == '+' else -1
3008             timezone = datetime.timedelta(
3009                 hours=sign * int(m.group('hours')),
3010                 minutes=sign * int(m.group('minutes')))
3011     return timezone, date_str
3012
3013
3014 def parse_iso8601(date_str, delimiter='T', timezone=None):
3015     """ Return a UNIX timestamp from the given date """
3016
3017     if date_str is None:
3018         return None
3019
3020     date_str = re.sub(r'\.[0-9]+', '', date_str)
3021
3022     if timezone is None:
3023         timezone, date_str = extract_timezone(date_str)
3024
3025     try:
3026         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3027         dt = datetime.datetime.strptime(date_str, date_format) - timezone
3028         return calendar.timegm(dt.timetuple())
3029     except ValueError:
3030         pass
3031
3032
3033 def date_formats(day_first=True):
3034     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3035
3036
3037 def unified_strdate(date_str, day_first=True):
3038     """Return a string with the date in the format YYYYMMDD"""
3039
3040     if date_str is None:
3041         return None
3042     upload_date = None
3043     # Replace commas
3044     date_str = date_str.replace(',', ' ')
3045     # Remove AM/PM + timezone
3046     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3047     _, date_str = extract_timezone(date_str)
3048
3049     for expression in date_formats(day_first):
3050         try:
3051             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3052         except ValueError:
3053             pass
3054     if upload_date is None:
3055         timetuple = email.utils.parsedate_tz(date_str)
3056         if timetuple:
3057             try:
3058                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3059             except ValueError:
3060                 pass
3061     if upload_date is not None:
3062         return compat_str(upload_date)
3063
3064
3065 def unified_timestamp(date_str, day_first=True):
3066     if date_str is None:
3067         return None
3068
3069     date_str = re.sub(r'[,|]', '', date_str)
3070
3071     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3072     timezone, date_str = extract_timezone(date_str)
3073
3074     # Remove AM/PM + timezone
3075     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3076
3077     # Remove unrecognized timezones from ISO 8601 alike timestamps
3078     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3079     if m:
3080         date_str = date_str[:-len(m.group('tz'))]
3081
3082     # Python only supports microseconds, so remove nanoseconds
3083     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3084     if m:
3085         date_str = m.group(1)
3086
3087     for expression in date_formats(day_first):
3088         try:
3089             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3090             return calendar.timegm(dt.timetuple())
3091         except ValueError:
3092             pass
3093     timetuple = email.utils.parsedate_tz(date_str)
3094     if timetuple:
3095         return calendar.timegm(timetuple) + pm_delta * 3600
3096
3097
3098 def determine_ext(url, default_ext='unknown_video'):
3099     if url is None or '.' not in url:
3100         return default_ext
3101     guess = url.partition('?')[0].rpartition('.')[2]
3102     if re.match(r'^[A-Za-z0-9]+$', guess):
3103         return guess
3104     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3105     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3106         return guess.rstrip('/')
3107     else:
3108         return default_ext
3109
3110
3111 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3112     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3113
3114
3115 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3116     """
3117     Return a datetime object from a string in the format YYYYMMDD or
3118     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3119
3120     format: string date format used to return datetime object from
3121     precision: round the time portion of a datetime object.
3122                 auto|microsecond|second|minute|hour|day.
3123                 auto: round to the unit provided in date_str (if applicable).
3124     """
3125     auto_precision = False
3126     if precision == 'auto':
3127         auto_precision = True
3128         precision = 'microsecond'
3129     today = datetime_round(datetime.datetime.now(), precision)
3130     if date_str in ('now', 'today'):
3131         return today
3132     if date_str == 'yesterday':
3133         return today - datetime.timedelta(days=1)
3134     match = re.match(
3135         r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3136         date_str)
3137     if match is not None:
3138         start_time = datetime_from_str(match.group('start'), precision, format)
3139         time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3140         unit = match.group('unit')
3141         if unit == 'month' or unit == 'year':
3142             new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3143             unit = 'day'
3144         else:
3145             if unit == 'week':
3146                 unit = 'day'
3147                 time *= 7
3148             delta = datetime.timedelta(**{unit + 's': time})
3149             new_date = start_time + delta
3150         if auto_precision:
3151             return datetime_round(new_date, unit)
3152         return new_date
3153
3154     return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3155
3156
3157 def date_from_str(date_str, format='%Y%m%d'):
3158     """
3159     Return a datetime object from a string in the format YYYYMMDD or
3160     (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3161
3162     format: string date format used to return datetime object from
3163     """
3164     return datetime_from_str(date_str, precision='microsecond', format=format).date()
3165
3166
3167 def datetime_add_months(dt, months):
3168     """Increment/Decrement a datetime object by months."""
3169     month = dt.month + months - 1
3170     year = dt.year + month // 12
3171     month = month % 12 + 1
3172     day = min(dt.day, calendar.monthrange(year, month)[1])
3173     return dt.replace(year, month, day)
3174
3175
3176 def datetime_round(dt, precision='day'):
3177     """
3178     Round a datetime object's time to a specific precision
3179     """
3180     if precision == 'microsecond':
3181         return dt
3182
3183     unit_seconds = {
3184         'day': 86400,
3185         'hour': 3600,
3186         'minute': 60,
3187         'second': 1,
3188     }
3189     roundto = lambda x, n: ((x + n / 2) // n) * n
3190     timestamp = calendar.timegm(dt.timetuple())
3191     return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3192
3193
3194 def hyphenate_date(date_str):
3195     """
3196     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3197     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3198     if match is not None:
3199         return '-'.join(match.groups())
3200     else:
3201         return date_str
3202
3203
3204 class DateRange(object):
3205     """Represents a time interval between two dates"""
3206
3207     def __init__(self, start=None, end=None):
3208         """start and end must be strings in the format accepted by date"""
3209         if start is not None:
3210             self.start = date_from_str(start)
3211         else:
3212             self.start = datetime.datetime.min.date()
3213         if end is not None:
3214             self.end = date_from_str(end)
3215         else:
3216             self.end = datetime.datetime.max.date()
3217         if self.start > self.end:
3218             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3219
3220     @classmethod
3221     def day(cls, day):
3222         """Returns a range that only contains the given day"""
3223         return cls(day, day)
3224
3225     def __contains__(self, date):
3226         """Check if the date is in the range"""
3227         if not isinstance(date, datetime.date):
3228             date = date_from_str(date)
3229         return self.start <= date <= self.end
3230
3231     def __str__(self):
3232         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3233
3234
3235 def platform_name():
3236     """ Returns the platform name as a compat_str """
3237     res = platform.platform()
3238     if isinstance(res, bytes):
3239         res = res.decode(preferredencoding())
3240
3241     assert isinstance(res, compat_str)
3242     return res
3243
3244
3245 def _windows_write_string(s, out):
3246     """ Returns True if the string was written using special methods,
3247     False if it has yet to be written out."""
3248     # Adapted from http://stackoverflow.com/a/3259271/35070
3249
3250     import ctypes
3251     import ctypes.wintypes
3252
3253     WIN_OUTPUT_IDS = {
3254         1: -11,
3255         2: -12,
3256     }
3257
3258     try:
3259         fileno = out.fileno()
3260     except AttributeError:
3261         # If the output stream doesn't have a fileno, it's virtual
3262         return False
3263     except io.UnsupportedOperation:
3264         # Some strange Windows pseudo files?
3265         return False
3266     if fileno not in WIN_OUTPUT_IDS:
3267         return False
3268
3269     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3270         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3271         ('GetStdHandle', ctypes.windll.kernel32))
3272     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3273
3274     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3275         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3276         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3277         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3278     written = ctypes.wintypes.DWORD(0)
3279
3280     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3281     FILE_TYPE_CHAR = 0x0002
3282     FILE_TYPE_REMOTE = 0x8000
3283     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3284         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3285         ctypes.POINTER(ctypes.wintypes.DWORD))(
3286         ('GetConsoleMode', ctypes.windll.kernel32))
3287     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3288
3289     def not_a_console(handle):
3290         if handle == INVALID_HANDLE_VALUE or handle is None:
3291             return True
3292         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3293                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3294
3295     if not_a_console(h):
3296         return False
3297
3298     def next_nonbmp_pos(s):
3299         try:
3300             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3301         except StopIteration:
3302             return len(s)
3303
3304     while s:
3305         count = min(next_nonbmp_pos(s), 1024)
3306
3307         ret = WriteConsoleW(
3308             h, s, count if count else 2, ctypes.byref(written), None)
3309         if ret == 0:
3310             raise OSError('Failed to write string')
3311         if not count:  # We just wrote a non-BMP character
3312             assert written.value == 2
3313             s = s[1:]
3314         else:
3315             assert written.value > 0
3316             s = s[written.value:]
3317     return True
3318
3319
3320 def write_string(s, out=None, encoding=None):
3321     if out is None:
3322         out = sys.stderr
3323     assert type(s) == compat_str
3324
3325     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3326         if _windows_write_string(s, out):
3327             return
3328
3329     if ('b' in getattr(out, 'mode', '')
3330             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3331         byt = s.encode(encoding or preferredencoding(), 'ignore')
3332         out.write(byt)
3333     elif hasattr(out, 'buffer'):
3334         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3335         byt = s.encode(enc, 'ignore')
3336         out.buffer.write(byt)
3337     else:
3338         out.write(s)
3339     out.flush()
3340
3341
3342 def bytes_to_intlist(bs):
3343     if not bs:
3344         return []
3345     if isinstance(bs[0], int):  # Python 3
3346         return list(bs)
3347     else:
3348         return [ord(c) for c in bs]
3349
3350
3351 def intlist_to_bytes(xs):
3352     if not xs:
3353         return b''
3354     return compat_struct_pack('%dB' % len(xs), *xs)
3355
3356
3357 # Cross-platform file locking
3358 if sys.platform == 'win32':
3359     import ctypes.wintypes
3360     import msvcrt
3361
3362     class OVERLAPPED(ctypes.Structure):
3363         _fields_ = [
3364             ('Internal', ctypes.wintypes.LPVOID),
3365             ('InternalHigh', ctypes.wintypes.LPVOID),
3366             ('Offset', ctypes.wintypes.DWORD),
3367             ('OffsetHigh', ctypes.wintypes.DWORD),
3368             ('hEvent', ctypes.wintypes.HANDLE),
3369         ]
3370
3371     kernel32 = ctypes.windll.kernel32
3372     LockFileEx = kernel32.LockFileEx
3373     LockFileEx.argtypes = [
3374         ctypes.wintypes.HANDLE,     # hFile
3375         ctypes.wintypes.DWORD,      # dwFlags
3376         ctypes.wintypes.DWORD,      # dwReserved
3377         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3378         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3379         ctypes.POINTER(OVERLAPPED)  # Overlapped
3380     ]
3381     LockFileEx.restype = ctypes.wintypes.BOOL
3382     UnlockFileEx = kernel32.UnlockFileEx
3383     UnlockFileEx.argtypes = [
3384         ctypes.wintypes.HANDLE,     # hFile
3385         ctypes.wintypes.DWORD,      # dwReserved
3386         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3387         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3388         ctypes.POINTER(OVERLAPPED)  # Overlapped
3389     ]
3390     UnlockFileEx.restype = ctypes.wintypes.BOOL
3391     whole_low = 0xffffffff
3392     whole_high = 0x7fffffff
3393
3394     def _lock_file(f, exclusive):
3395         overlapped = OVERLAPPED()
3396         overlapped.Offset = 0
3397         overlapped.OffsetHigh = 0
3398         overlapped.hEvent = 0
3399         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3400         handle = msvcrt.get_osfhandle(f.fileno())
3401         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3402                           whole_low, whole_high, f._lock_file_overlapped_p):
3403             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3404
3405     def _unlock_file(f):
3406         assert f._lock_file_overlapped_p
3407         handle = msvcrt.get_osfhandle(f.fileno())
3408         if not UnlockFileEx(handle, 0,
3409                             whole_low, whole_high, f._lock_file_overlapped_p):
3410             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3411
3412 else:
3413     # Some platforms, such as Jython, is missing fcntl
3414     try:
3415         import fcntl
3416
3417         def _lock_file(f, exclusive):
3418             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3419
3420         def _unlock_file(f):
3421             fcntl.flock(f, fcntl.LOCK_UN)
3422     except ImportError:
3423         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3424
3425         def _lock_file(f, exclusive):
3426             raise IOError(UNSUPPORTED_MSG)
3427
3428         def _unlock_file(f):
3429             raise IOError(UNSUPPORTED_MSG)
3430
3431
3432 class locked_file(object):
3433     def __init__(self, filename, mode, encoding=None):
3434         assert mode in ['r', 'a', 'w']
3435         self.f = io.open(filename, mode, encoding=encoding)
3436         self.mode = mode
3437
3438     def __enter__(self):
3439         exclusive = self.mode != 'r'
3440         try:
3441             _lock_file(self.f, exclusive)
3442         except IOError:
3443             self.f.close()
3444             raise
3445         return self
3446
3447     def __exit__(self, etype, value, traceback):
3448         try:
3449             _unlock_file(self.f)
3450         finally:
3451             self.f.close()
3452
3453     def __iter__(self):
3454         return iter(self.f)
3455
3456     def write(self, *args):
3457         return self.f.write(*args)
3458
3459     def read(self, *args):
3460         return self.f.read(*args)
3461
3462
3463 def get_filesystem_encoding():
3464     encoding = sys.getfilesystemencoding()
3465     return encoding if encoding is not None else 'utf-8'
3466
3467
3468 def shell_quote(args):
3469     quoted_args = []
3470     encoding = get_filesystem_encoding()
3471     for a in args:
3472         if isinstance(a, bytes):
3473             # We may get a filename encoded with 'encodeFilename'
3474             a = a.decode(encoding)
3475         quoted_args.append(compat_shlex_quote(a))
3476     return ' '.join(quoted_args)
3477
3478
3479 def smuggle_url(url, data):
3480     """ Pass additional data in a URL for internal use. """
3481
3482     url, idata = unsmuggle_url(url, {})
3483     data.update(idata)
3484     sdata = compat_urllib_parse_urlencode(
3485         {'__youtubedl_smuggle': json.dumps(data)})
3486     return url + '#' + sdata
3487
3488
3489 def unsmuggle_url(smug_url, default=None):
3490     if '#__youtubedl_smuggle' not in smug_url:
3491         return smug_url, default
3492     url, _, sdata = smug_url.rpartition('#')
3493     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3494     data = json.loads(jsond)
3495     return url, data
3496
3497
3498 def format_bytes(bytes):
3499     if bytes is None:
3500         return 'N/A'
3501     if type(bytes) is str:
3502         bytes = float(bytes)
3503     if bytes == 0.0:
3504         exponent = 0
3505     else:
3506         exponent = int(math.log(bytes, 1024.0))
3507     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3508     converted = float(bytes) / float(1024 ** exponent)
3509     return '%.2f%s' % (converted, suffix)
3510
3511
3512 def lookup_unit_table(unit_table, s):
3513     units_re = '|'.join(re.escape(u) for u in unit_table)
3514     m = re.match(
3515         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3516     if not m:
3517         return None
3518     num_str = m.group('num').replace(',', '.')
3519     mult = unit_table[m.group('unit')]
3520     return int(float(num_str) * mult)
3521
3522
3523 def parse_filesize(s):
3524     if s is None:
3525         return None
3526
3527     # The lower-case forms are of course incorrect and unofficial,
3528     # but we support those too
3529     _UNIT_TABLE = {
3530         'B': 1,
3531         'b': 1,
3532         'bytes': 1,
3533         'KiB': 1024,
3534         'KB': 1000,
3535         'kB': 1024,
3536         'Kb': 1000,
3537         'kb': 1000,
3538         'kilobytes': 1000,
3539         'kibibytes': 1024,
3540         'MiB': 1024 ** 2,
3541         'MB': 1000 ** 2,
3542         'mB': 1024 ** 2,
3543         'Mb': 1000 ** 2,
3544         'mb': 1000 ** 2,
3545         'megabytes': 1000 ** 2,
3546         'mebibytes': 1024 ** 2,
3547         'GiB': 1024 ** 3,
3548         'GB': 1000 ** 3,
3549         'gB': 1024 ** 3,
3550         'Gb': 1000 ** 3,
3551         'gb': 1000 ** 3,
3552         'gigabytes': 1000 ** 3,
3553         'gibibytes': 1024 ** 3,
3554         'TiB': 1024 ** 4,
3555         'TB': 1000 ** 4,
3556         'tB': 1024 ** 4,
3557         'Tb': 1000 ** 4,
3558         'tb': 1000 ** 4,
3559         'terabytes': 1000 ** 4,
3560         'tebibytes': 1024 ** 4,
3561         'PiB': 1024 ** 5,
3562         'PB': 1000 ** 5,
3563         'pB': 1024 ** 5,
3564         'Pb': 1000 ** 5,
3565         'pb': 1000 ** 5,
3566         'petabytes': 1000 ** 5,
3567         'pebibytes': 1024 ** 5,
3568         'EiB': 1024 ** 6,
3569         'EB': 1000 ** 6,
3570         'eB': 1024 ** 6,
3571         'Eb': 1000 ** 6,
3572         'eb': 1000 ** 6,
3573         'exabytes': 1000 ** 6,
3574         'exbibytes': 1024 ** 6,
3575         'ZiB': 1024 ** 7,
3576         'ZB': 1000 ** 7,
3577         'zB': 1024 ** 7,
3578         'Zb': 1000 ** 7,
3579         'zb': 1000 ** 7,
3580         'zettabytes': 1000 ** 7,
3581         'zebibytes': 1024 ** 7,
3582         'YiB': 1024 ** 8,
3583         'YB': 1000 ** 8,
3584         'yB': 1024 ** 8,
3585         'Yb': 1000 ** 8,
3586         'yb': 1000 ** 8,
3587         'yottabytes': 1000 ** 8,
3588         'yobibytes': 1024 ** 8,
3589     }
3590
3591     return lookup_unit_table(_UNIT_TABLE, s)
3592
3593
3594 def parse_count(s):
3595     if s is None:
3596         return None
3597
3598     s = s.strip()
3599
3600     if re.match(r'^[\d,.]+$', s):
3601         return str_to_int(s)
3602
3603     _UNIT_TABLE = {
3604         'k': 1000,
3605         'K': 1000,
3606         'm': 1000 ** 2,
3607         'M': 1000 ** 2,
3608         'kk': 1000 ** 2,
3609         'KK': 1000 ** 2,
3610     }
3611
3612     return lookup_unit_table(_UNIT_TABLE, s)
3613
3614
3615 def parse_resolution(s):
3616     if s is None:
3617         return {}
3618
3619     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3620     if mobj:
3621         return {
3622             'width': int(mobj.group('w')),
3623             'height': int(mobj.group('h')),
3624         }
3625
3626     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3627     if mobj:
3628         return {'height': int(mobj.group(1))}
3629
3630     mobj = re.search(r'\b([48])[kK]\b', s)
3631     if mobj:
3632         return {'height': int(mobj.group(1)) * 540}
3633
3634     return {}
3635
3636
3637 def parse_bitrate(s):
3638     if not isinstance(s, compat_str):
3639         return
3640     mobj = re.search(r'\b(\d+)\s*kbps', s)
3641     if mobj:
3642         return int(mobj.group(1))
3643
3644
3645 def month_by_name(name, lang='en'):
3646     """ Return the number of a month by (locale-independently) English name """
3647
3648     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3649
3650     try:
3651         return month_names.index(name) + 1
3652     except ValueError:
3653         return None
3654
3655
3656 def month_by_abbreviation(abbrev):
3657     """ Return the number of a month by (locale-independently) English
3658         abbreviations """
3659
3660     try:
3661         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3662     except ValueError:
3663         return None
3664
3665
3666 def fix_xml_ampersands(xml_str):
3667     """Replace all the '&' by '&amp;' in XML"""
3668     return re.sub(
3669         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3670         '&amp;',
3671         xml_str)
3672
3673
3674 def setproctitle(title):
3675     assert isinstance(title, compat_str)
3676
3677     # ctypes in Jython is not complete
3678     # http://bugs.jython.org/issue2148
3679     if sys.platform.startswith('java'):
3680         return
3681
3682     try:
3683         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3684     except OSError:
3685         return
3686     except TypeError:
3687         # LoadLibrary in Windows Python 2.7.13 only expects
3688         # a bytestring, but since unicode_literals turns
3689         # every string into a unicode string, it fails.
3690         return
3691     title_bytes = title.encode('utf-8')
3692     buf = ctypes.create_string_buffer(len(title_bytes))
3693     buf.value = title_bytes
3694     try:
3695         libc.prctl(15, buf, 0, 0, 0)
3696     except AttributeError:
3697         return  # Strange libc, just skip this
3698
3699
3700 def remove_start(s, start):
3701     return s[len(start):] if s is not None and s.startswith(start) else s
3702
3703
3704 def remove_end(s, end):
3705     return s[:-len(end)] if s is not None and s.endswith(end) else s
3706
3707
3708 def remove_quotes(s):
3709     if s is None or len(s) < 2:
3710         return s
3711     for quote in ('"', "'", ):
3712         if s[0] == quote and s[-1] == quote:
3713             return s[1:-1]
3714     return s
3715
3716
3717 def get_domain(url):
3718     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3719     return domain.group('domain') if domain else None
3720
3721
3722 def url_basename(url):
3723     path = compat_urlparse.urlparse(url).path
3724     return path.strip('/').split('/')[-1]
3725
3726
3727 def base_url(url):
3728     return re.match(r'https?://[^?#&]+/', url).group()
3729
3730
3731 def urljoin(base, path):
3732     if isinstance(path, bytes):
3733         path = path.decode('utf-8')
3734     if not isinstance(path, compat_str) or not path:
3735         return None
3736     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3737         return path
3738     if isinstance(base, bytes):
3739         base = base.decode('utf-8')
3740     if not isinstance(base, compat_str) or not re.match(
3741             r'^(?:https?:)?//', base):
3742         return None
3743     return compat_urlparse.urljoin(base, path)
3744
3745
3746 class HEADRequest(compat_urllib_request.Request):
3747     def get_method(self):
3748         return 'HEAD'
3749
3750
3751 class PUTRequest(compat_urllib_request.Request):
3752     def get_method(self):
3753         return 'PUT'
3754
3755
3756 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3757     if get_attr:
3758         if v is not None:
3759             v = getattr(v, get_attr, None)
3760     if v == '':
3761         v = None
3762     if v is None:
3763         return default
3764     try:
3765         return int(v) * invscale // scale
3766     except (ValueError, TypeError):
3767         return default
3768
3769
3770 def str_or_none(v, default=None):
3771     return default if v is None else compat_str(v)
3772
3773
3774 def str_to_int(int_str):
3775     """ A more relaxed version of int_or_none """
3776     if isinstance(int_str, compat_integer_types):
3777         return int_str
3778     elif isinstance(int_str, compat_str):
3779         int_str = re.sub(r'[,\.\+]', '', int_str)
3780         return int_or_none(int_str)
3781
3782
3783 def float_or_none(v, scale=1, invscale=1, default=None):
3784     if v is None:
3785         return default
3786     try:
3787         return float(v) * invscale / scale
3788     except (ValueError, TypeError):
3789         return default
3790
3791
3792 def bool_or_none(v, default=None):
3793     return v if isinstance(v, bool) else default
3794
3795
3796 def strip_or_none(v, default=None):
3797     return v.strip() if isinstance(v, compat_str) else default
3798
3799
3800 def url_or_none(url):
3801     if not url or not isinstance(url, compat_str):
3802         return None
3803     url = url.strip()
3804     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3805
3806
3807 def strftime_or_none(timestamp, date_format, default=None):
3808     datetime_object = None
3809     try:
3810         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3811             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3812         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3813             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3814         return datetime_object.strftime(date_format)
3815     except (ValueError, TypeError, AttributeError):
3816         return default
3817
3818
3819 def parse_duration(s):
3820     if not isinstance(s, compat_basestring):
3821         return None
3822
3823     s = s.strip()
3824
3825     days, hours, mins, secs, ms = [None] * 5
3826     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3827     if m:
3828         days, hours, mins, secs, ms = m.groups()
3829     else:
3830         m = re.match(
3831             r'''(?ix)(?:P?
3832                 (?:
3833                     [0-9]+\s*y(?:ears?)?\s*
3834                 )?
3835                 (?:
3836                     [0-9]+\s*m(?:onths?)?\s*
3837                 )?
3838                 (?:
3839                     [0-9]+\s*w(?:eeks?)?\s*
3840                 )?
3841                 (?:
3842                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3843                 )?
3844                 T)?
3845                 (?:
3846                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3847                 )?
3848                 (?:
3849                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3850                 )?
3851                 (?:
3852                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3853                 )?Z?$''', s)
3854         if m:
3855             days, hours, mins, secs, ms = m.groups()
3856         else:
3857             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3858             if m:
3859                 hours, mins = m.groups()
3860             else:
3861                 return None
3862
3863     duration = 0
3864     if secs:
3865         duration += float(secs)
3866     if mins:
3867         duration += float(mins) * 60
3868     if hours:
3869         duration += float(hours) * 60 * 60
3870     if days:
3871         duration += float(days) * 24 * 60 * 60
3872     if ms:
3873         duration += float(ms)
3874     return duration
3875
3876
3877 def prepend_extension(filename, ext, expected_real_ext=None):
3878     name, real_ext = os.path.splitext(filename)
3879     return (
3880         '{0}.{1}{2}'.format(name, ext, real_ext)
3881         if not expected_real_ext or real_ext[1:] == expected_real_ext
3882         else '{0}.{1}'.format(filename, ext))
3883
3884
3885 def replace_extension(filename, ext, expected_real_ext=None):
3886     name, real_ext = os.path.splitext(filename)
3887     return '{0}.{1}'.format(
3888         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3889         ext)
3890
3891
3892 def check_executable(exe, args=[]):
3893     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3894     args can be a list of arguments for a short output (like -version) """
3895     try:
3896         process_communicate_or_kill(subprocess.Popen(
3897             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3898     except OSError:
3899         return False
3900     return exe
3901
3902
3903 def get_exe_version(exe, args=['--version'],
3904                     version_re=None, unrecognized='present'):
3905     """ Returns the version of the specified executable,
3906     or False if the executable is not present """
3907     try:
3908         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3909         # SIGTTOU if yt-dlp is run in the background.
3910         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3911         out, _ = process_communicate_or_kill(subprocess.Popen(
3912             [encodeArgument(exe)] + args,
3913             stdin=subprocess.PIPE,
3914             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3915     except OSError:
3916         return False
3917     if isinstance(out, bytes):  # Python 2.x
3918         out = out.decode('ascii', 'ignore')
3919     return detect_exe_version(out, version_re, unrecognized)
3920
3921
3922 def detect_exe_version(output, version_re=None, unrecognized='present'):
3923     assert isinstance(output, compat_str)
3924     if version_re is None:
3925         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3926     m = re.search(version_re, output)
3927     if m:
3928         return m.group(1)
3929     else:
3930         return unrecognized
3931
3932
3933 class PagedList(object):
3934     def __len__(self):
3935         # This is only useful for tests
3936         return len(self.getslice())
3937
3938
3939 class OnDemandPagedList(PagedList):
3940     def __init__(self, pagefunc, pagesize, use_cache=True):
3941         self._pagefunc = pagefunc
3942         self._pagesize = pagesize
3943         self._use_cache = use_cache
3944         if use_cache:
3945             self._cache = {}
3946
3947     def getslice(self, start=0, end=None):
3948         res = []
3949         for pagenum in itertools.count(start // self._pagesize):
3950             firstid = pagenum * self._pagesize
3951             nextfirstid = pagenum * self._pagesize + self._pagesize
3952             if start >= nextfirstid:
3953                 continue
3954
3955             page_results = None
3956             if self._use_cache:
3957                 page_results = self._cache.get(pagenum)
3958             if page_results is None:
3959                 page_results = list(self._pagefunc(pagenum))
3960             if self._use_cache:
3961                 self._cache[pagenum] = page_results
3962
3963             startv = (
3964                 start % self._pagesize
3965                 if firstid <= start < nextfirstid
3966                 else 0)
3967
3968             endv = (
3969                 ((end - 1) % self._pagesize) + 1
3970                 if (end is not None and firstid <= end <= nextfirstid)
3971                 else None)
3972
3973             if startv != 0 or endv is not None:
3974                 page_results = page_results[startv:endv]
3975             res.extend(page_results)
3976
3977             # A little optimization - if current page is not "full", ie. does
3978             # not contain page_size videos then we can assume that this page
3979             # is the last one - there are no more ids on further pages -
3980             # i.e. no need to query again.
3981             if len(page_results) + startv < self._pagesize:
3982                 break
3983
3984             # If we got the whole page, but the next page is not interesting,
3985             # break out early as well
3986             if end == nextfirstid:
3987                 break
3988         return res
3989
3990
3991 class InAdvancePagedList(PagedList):
3992     def __init__(self, pagefunc, pagecount, pagesize):
3993         self._pagefunc = pagefunc
3994         self._pagecount = pagecount
3995         self._pagesize = pagesize
3996
3997     def getslice(self, start=0, end=None):
3998         res = []
3999         start_page = start // self._pagesize
4000         end_page = (
4001             self._pagecount if end is None else (end // self._pagesize + 1))
4002         skip_elems = start - start_page * self._pagesize
4003         only_more = None if end is None else end - start
4004         for pagenum in range(start_page, end_page):
4005             page = list(self._pagefunc(pagenum))
4006             if skip_elems:
4007                 page = page[skip_elems:]
4008                 skip_elems = None
4009             if only_more is not None:
4010                 if len(page) < only_more:
4011                     only_more -= len(page)
4012                 else:
4013                     page = page[:only_more]
4014                     res.extend(page)
4015                     break
4016             res.extend(page)
4017         return res
4018
4019
4020 def uppercase_escape(s):
4021     unicode_escape = codecs.getdecoder('unicode_escape')
4022     return re.sub(
4023         r'\\U[0-9a-fA-F]{8}',
4024         lambda m: unicode_escape(m.group(0))[0],
4025         s)
4026
4027
4028 def lowercase_escape(s):
4029     unicode_escape = codecs.getdecoder('unicode_escape')
4030     return re.sub(
4031         r'\\u[0-9a-fA-F]{4}',
4032         lambda m: unicode_escape(m.group(0))[0],
4033         s)
4034
4035
4036 def escape_rfc3986(s):
4037     """Escape non-ASCII characters as suggested by RFC 3986"""
4038     if sys.version_info < (3, 0) and isinstance(s, compat_str):
4039         s = s.encode('utf-8')
4040     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4041
4042
4043 def escape_url(url):
4044     """Escape URL as suggested by RFC 3986"""
4045     url_parsed = compat_urllib_parse_urlparse(url)
4046     return url_parsed._replace(
4047         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4048         path=escape_rfc3986(url_parsed.path),
4049         params=escape_rfc3986(url_parsed.params),
4050         query=escape_rfc3986(url_parsed.query),
4051         fragment=escape_rfc3986(url_parsed.fragment)
4052     ).geturl()
4053
4054
4055 def read_batch_urls(batch_fd):
4056     def fixup(url):
4057         if not isinstance(url, compat_str):
4058             url = url.decode('utf-8', 'replace')
4059         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4060         for bom in BOM_UTF8:
4061             if url.startswith(bom):
4062                 url = url[len(bom):]
4063         url = url.lstrip()
4064         if not url or url.startswith(('#', ';', ']')):
4065             return False
4066         # "#" cannot be stripped out since it is part of the URI
4067         # However, it can be safely stipped out if follwing a whitespace
4068         return re.split(r'\s#', url, 1)[0].rstrip()
4069
4070     with contextlib.closing(batch_fd) as fd:
4071         return [url for url in map(fixup, fd) if url]
4072
4073
4074 def urlencode_postdata(*args, **kargs):
4075     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4076
4077
4078 def update_url_query(url, query):
4079     if not query:
4080         return url
4081     parsed_url = compat_urlparse.urlparse(url)
4082     qs = compat_parse_qs(parsed_url.query)
4083     qs.update(query)
4084     return compat_urlparse.urlunparse(parsed_url._replace(
4085         query=compat_urllib_parse_urlencode(qs, True)))
4086
4087
4088 def update_Request(req, url=None, data=None, headers={}, query={}):
4089     req_headers = req.headers.copy()
4090     req_headers.update(headers)
4091     req_data = data or req.data
4092     req_url = update_url_query(url or req.get_full_url(), query)
4093     req_get_method = req.get_method()
4094     if req_get_method == 'HEAD':
4095         req_type = HEADRequest
4096     elif req_get_method == 'PUT':
4097         req_type = PUTRequest
4098     else:
4099         req_type = compat_urllib_request.Request
4100     new_req = req_type(
4101         req_url, data=req_data, headers=req_headers,
4102         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4103     if hasattr(req, 'timeout'):
4104         new_req.timeout = req.timeout
4105     return new_req
4106
4107
4108 def _multipart_encode_impl(data, boundary):
4109     content_type = 'multipart/form-data; boundary=%s' % boundary
4110
4111     out = b''
4112     for k, v in data.items():
4113         out += b'--' + boundary.encode('ascii') + b'\r\n'
4114         if isinstance(k, compat_str):
4115             k = k.encode('utf-8')
4116         if isinstance(v, compat_str):
4117             v = v.encode('utf-8')
4118         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4119         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4120         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4121         if boundary.encode('ascii') in content:
4122             raise ValueError('Boundary overlaps with data')
4123         out += content
4124
4125     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4126
4127     return out, content_type
4128
4129
4130 def multipart_encode(data, boundary=None):
4131     '''
4132     Encode a dict to RFC 7578-compliant form-data
4133
4134     data:
4135         A dict where keys and values can be either Unicode or bytes-like
4136         objects.
4137     boundary:
4138         If specified a Unicode object, it's used as the boundary. Otherwise
4139         a random boundary is generated.
4140
4141     Reference: https://tools.ietf.org/html/rfc7578
4142     '''
4143     has_specified_boundary = boundary is not None
4144
4145     while True:
4146         if boundary is None:
4147             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4148
4149         try:
4150             out, content_type = _multipart_encode_impl(data, boundary)
4151             break
4152         except ValueError:
4153             if has_specified_boundary:
4154                 raise
4155             boundary = None
4156
4157     return out, content_type
4158
4159
4160 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4161     if isinstance(key_or_keys, (list, tuple)):
4162         for key in key_or_keys:
4163             if key not in d or d[key] is None or skip_false_values and not d[key]:
4164                 continue
4165             return d[key]
4166         return default
4167     return d.get(key_or_keys, default)
4168
4169
4170 def try_get(src, getter, expected_type=None):
4171     if not isinstance(getter, (list, tuple)):
4172         getter = [getter]
4173     for get in getter:
4174         try:
4175             v = get(src)
4176         except (AttributeError, KeyError, TypeError, IndexError):
4177             pass
4178         else:
4179             if expected_type is None or isinstance(v, expected_type):
4180                 return v
4181
4182
4183 def merge_dicts(*dicts):
4184     merged = {}
4185     for a_dict in dicts:
4186         for k, v in a_dict.items():
4187             if v is None:
4188                 continue
4189             if (k not in merged
4190                     or (isinstance(v, compat_str) and v
4191                         and isinstance(merged[k], compat_str)
4192                         and not merged[k])):
4193                 merged[k] = v
4194     return merged
4195
4196
4197 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4198     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4199
4200
4201 US_RATINGS = {
4202     'G': 0,
4203     'PG': 10,
4204     'PG-13': 13,
4205     'R': 16,
4206     'NC': 18,
4207 }
4208
4209
4210 TV_PARENTAL_GUIDELINES = {
4211     'TV-Y': 0,
4212     'TV-Y7': 7,
4213     'TV-G': 0,
4214     'TV-PG': 0,
4215     'TV-14': 14,
4216     'TV-MA': 17,
4217 }
4218
4219
4220 def parse_age_limit(s):
4221     if type(s) == int:
4222         return s if 0 <= s <= 21 else None
4223     if not isinstance(s, compat_basestring):
4224         return None
4225     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4226     if m:
4227         return int(m.group('age'))
4228     s = s.upper()
4229     if s in US_RATINGS:
4230         return US_RATINGS[s]
4231     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4232     if m:
4233         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4234     return None
4235
4236
4237 def strip_jsonp(code):
4238     return re.sub(
4239         r'''(?sx)^
4240             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4241             (?:\s*&&\s*(?P=func_name))?
4242             \s*\(\s*(?P<callback_data>.*)\);?
4243             \s*?(?://[^\n]*)*$''',
4244         r'\g<callback_data>', code)
4245
4246
4247 def js_to_json(code, vars={}):
4248     # vars is a dict of var, val pairs to substitute
4249     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4250     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4251     INTEGER_TABLE = (
4252         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4253         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4254     )
4255
4256     def fix_kv(m):
4257         v = m.group(0)
4258         if v in ('true', 'false', 'null'):
4259             return v
4260         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4261             return ""
4262
4263         if v[0] in ("'", '"'):
4264             v = re.sub(r'(?s)\\.|"', lambda m: {
4265                 '"': '\\"',
4266                 "\\'": "'",
4267                 '\\\n': '',
4268                 '\\x': '\\u00',
4269             }.get(m.group(0), m.group(0)), v[1:-1])
4270         else:
4271             for regex, base in INTEGER_TABLE:
4272                 im = re.match(regex, v)
4273                 if im:
4274                     i = int(im.group(1), base)
4275                     return '"%d":' % i if v.endswith(':') else '%d' % i
4276
4277             if v in vars:
4278                 return vars[v]
4279
4280         return '"%s"' % v
4281
4282     return re.sub(r'''(?sx)
4283         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4284         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4285         {comment}|,(?={skip}[\]}}])|
4286         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4287         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4288         [0-9]+(?={skip}:)|
4289         !+
4290         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4291
4292
4293 def qualities(quality_ids):
4294     """ Get a numeric quality value out of a list of possible values """
4295     def q(qid):
4296         try:
4297             return quality_ids.index(qid)
4298         except ValueError:
4299             return -1
4300     return q
4301
4302
4303 DEFAULT_OUTTMPL = {
4304     'default': '%(title)s [%(id)s].%(ext)s',
4305     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4306 }
4307 OUTTMPL_TYPES = {
4308     'chapter': None,
4309     'subtitle': None,
4310     'thumbnail': None,
4311     'description': 'description',
4312     'annotation': 'annotations.xml',
4313     'infojson': 'info.json',
4314     'pl_description': 'description',
4315     'pl_infojson': 'info.json',
4316 }
4317
4318 # As of [1] format syntax is:
4319 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4320 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4321 FORMAT_RE = r'''(?x)
4322     (?<!%)
4323     %
4324     \({0}\)  # mapping key
4325     (?:[#0\-+ ]+)?  # conversion flags (optional)
4326     (?:\d+)?  # minimum field width (optional)
4327     (?:\.\d+)?  # precision (optional)
4328     [hlL]?  # length modifier (optional)
4329     (?P<type>[diouxXeEfFgGcrs%])  # conversion type
4330 '''
4331
4332
4333 def limit_length(s, length):
4334     """ Add ellipses to overly long strings """
4335     if s is None:
4336         return None
4337     ELLIPSES = '...'
4338     if len(s) > length:
4339         return s[:length - len(ELLIPSES)] + ELLIPSES
4340     return s
4341
4342
4343 def version_tuple(v):
4344     return tuple(int(e) for e in re.split(r'[-.]', v))
4345
4346
4347 def is_outdated_version(version, limit, assume_new=True):
4348     if not version:
4349         return not assume_new
4350     try:
4351         return version_tuple(version) < version_tuple(limit)
4352     except ValueError:
4353         return not assume_new
4354
4355
4356 def ytdl_is_updateable():
4357     """ Returns if yt-dlp can be updated with -U """
4358     return False
4359
4360     from zipimport import zipimporter
4361
4362     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4363
4364
4365 def args_to_str(args):
4366     # Get a short string representation for a subprocess command
4367     return ' '.join(compat_shlex_quote(a) for a in args)
4368
4369
4370 def error_to_compat_str(err):
4371     err_str = str(err)
4372     # On python 2 error byte string must be decoded with proper
4373     # encoding rather than ascii
4374     if sys.version_info[0] < 3:
4375         err_str = err_str.decode(preferredencoding())
4376     return err_str
4377
4378
4379 def mimetype2ext(mt):
4380     if mt is None:
4381         return None
4382
4383     ext = {
4384         'audio/mp4': 'm4a',
4385         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4386         # it's the most popular one
4387         'audio/mpeg': 'mp3',
4388         'audio/x-wav': 'wav',
4389     }.get(mt)
4390     if ext is not None:
4391         return ext
4392
4393     _, _, res = mt.rpartition('/')
4394     res = res.split(';')[0].strip().lower()
4395
4396     return {
4397         '3gpp': '3gp',
4398         'smptett+xml': 'tt',
4399         'ttaf+xml': 'dfxp',
4400         'ttml+xml': 'ttml',
4401         'x-flv': 'flv',
4402         'x-mp4-fragmented': 'mp4',
4403         'x-ms-sami': 'sami',
4404         'x-ms-wmv': 'wmv',
4405         'mpegurl': 'm3u8',
4406         'x-mpegurl': 'm3u8',
4407         'vnd.apple.mpegurl': 'm3u8',
4408         'dash+xml': 'mpd',
4409         'f4m+xml': 'f4m',
4410         'hds+xml': 'f4m',
4411         'vnd.ms-sstr+xml': 'ism',
4412         'quicktime': 'mov',
4413         'mp2t': 'ts',
4414         'x-wav': 'wav',
4415     }.get(res, res)
4416
4417
4418 def parse_codecs(codecs_str):
4419     # http://tools.ietf.org/html/rfc6381
4420     if not codecs_str:
4421         return {}
4422     split_codecs = list(filter(None, map(
4423         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4424     vcodec, acodec = None, None
4425     for full_codec in split_codecs:
4426         codec = full_codec.split('.')[0]
4427         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4428             if not vcodec:
4429                 vcodec = full_codec
4430         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4431             if not acodec:
4432                 acodec = full_codec
4433         else:
4434             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4435     if not vcodec and not acodec:
4436         if len(split_codecs) == 2:
4437             return {
4438                 'vcodec': split_codecs[0],
4439                 'acodec': split_codecs[1],
4440             }
4441     else:
4442         return {
4443             'vcodec': vcodec or 'none',
4444             'acodec': acodec or 'none',
4445         }
4446     return {}
4447
4448
4449 def urlhandle_detect_ext(url_handle):
4450     getheader = url_handle.headers.get
4451
4452     cd = getheader('Content-Disposition')
4453     if cd:
4454         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4455         if m:
4456             e = determine_ext(m.group('filename'), default_ext=None)
4457             if e:
4458                 return e
4459
4460     return mimetype2ext(getheader('Content-Type'))
4461
4462
4463 def encode_data_uri(data, mime_type):
4464     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4465
4466
4467 def age_restricted(content_limit, age_limit):
4468     """ Returns True iff the content should be blocked """
4469
4470     if age_limit is None:  # No limit set
4471         return False
4472     if content_limit is None:
4473         return False  # Content available for everyone
4474     return age_limit < content_limit
4475
4476
4477 def is_html(first_bytes):
4478     """ Detect whether a file contains HTML by examining its first bytes. """
4479
4480     BOMS = [
4481         (b'\xef\xbb\xbf', 'utf-8'),
4482         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4483         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4484         (b'\xff\xfe', 'utf-16-le'),
4485         (b'\xfe\xff', 'utf-16-be'),
4486     ]
4487     for bom, enc in BOMS:
4488         if first_bytes.startswith(bom):
4489             s = first_bytes[len(bom):].decode(enc, 'replace')
4490             break
4491     else:
4492         s = first_bytes.decode('utf-8', 'replace')
4493
4494     return re.match(r'^\s*<', s)
4495
4496
4497 def determine_protocol(info_dict):
4498     protocol = info_dict.get('protocol')
4499     if protocol is not None:
4500         return protocol
4501
4502     url = info_dict['url']
4503     if url.startswith('rtmp'):
4504         return 'rtmp'
4505     elif url.startswith('mms'):
4506         return 'mms'
4507     elif url.startswith('rtsp'):
4508         return 'rtsp'
4509
4510     ext = determine_ext(url)
4511     if ext == 'm3u8':
4512         return 'm3u8'
4513     elif ext == 'f4m':
4514         return 'f4m'
4515
4516     return compat_urllib_parse_urlparse(url).scheme
4517
4518
4519 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4520     """ Render a list of rows, each as a list of values """
4521
4522     def get_max_lens(table):
4523         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4524
4525     def filter_using_list(row, filterArray):
4526         return [col for (take, col) in zip(filterArray, row) if take]
4527
4528     if hideEmpty:
4529         max_lens = get_max_lens(data)
4530         header_row = filter_using_list(header_row, max_lens)
4531         data = [filter_using_list(row, max_lens) for row in data]
4532
4533     table = [header_row] + data
4534     max_lens = get_max_lens(table)
4535     if delim:
4536         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4537     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4538     return '\n'.join(format_str % tuple(row) for row in table)
4539
4540
4541 def _match_one(filter_part, dct):
4542     COMPARISON_OPERATORS = {
4543         '<': operator.lt,
4544         '<=': operator.le,
4545         '>': operator.gt,
4546         '>=': operator.ge,
4547         '=': operator.eq,
4548         '!=': operator.ne,
4549     }
4550     operator_rex = re.compile(r'''(?x)\s*
4551         (?P<key>[a-z_]+)
4552         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4553         (?:
4554             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4555             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4556             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4557         )
4558         \s*$
4559         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4560     m = operator_rex.search(filter_part)
4561     if m:
4562         op = COMPARISON_OPERATORS[m.group('op')]
4563         actual_value = dct.get(m.group('key'))
4564         if (m.group('quotedstrval') is not None
4565             or m.group('strval') is not None
4566             # If the original field is a string and matching comparisonvalue is
4567             # a number we should respect the origin of the original field
4568             # and process comparison value as a string (see
4569             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4570             or actual_value is not None and m.group('intval') is not None
4571                 and isinstance(actual_value, compat_str)):
4572             if m.group('op') not in ('=', '!='):
4573                 raise ValueError(
4574                     'Operator %s does not support string values!' % m.group('op'))
4575             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4576             quote = m.group('quote')
4577             if quote is not None:
4578                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4579         else:
4580             try:
4581                 comparison_value = int(m.group('intval'))
4582             except ValueError:
4583                 comparison_value = parse_filesize(m.group('intval'))
4584                 if comparison_value is None:
4585                     comparison_value = parse_filesize(m.group('intval') + 'B')
4586                 if comparison_value is None:
4587                     raise ValueError(
4588                         'Invalid integer value %r in filter part %r' % (
4589                             m.group('intval'), filter_part))
4590         if actual_value is None:
4591             return m.group('none_inclusive')
4592         return op(actual_value, comparison_value)
4593
4594     UNARY_OPERATORS = {
4595         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4596         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4597     }
4598     operator_rex = re.compile(r'''(?x)\s*
4599         (?P<op>%s)\s*(?P<key>[a-z_]+)
4600         \s*$
4601         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4602     m = operator_rex.search(filter_part)
4603     if m:
4604         op = UNARY_OPERATORS[m.group('op')]
4605         actual_value = dct.get(m.group('key'))
4606         return op(actual_value)
4607
4608     raise ValueError('Invalid filter part %r' % filter_part)
4609
4610
4611 def match_str(filter_str, dct):
4612     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4613
4614     return all(
4615         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4616
4617
4618 def match_filter_func(filter_str):
4619     def _match_func(info_dict):
4620         if match_str(filter_str, info_dict):
4621             return None
4622         else:
4623             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4624             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4625     return _match_func
4626
4627
4628 def parse_dfxp_time_expr(time_expr):
4629     if not time_expr:
4630         return
4631
4632     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4633     if mobj:
4634         return float(mobj.group('time_offset'))
4635
4636     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4637     if mobj:
4638         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4639
4640
4641 def srt_subtitles_timecode(seconds):
4642     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4643
4644
4645 def dfxp2srt(dfxp_data):
4646     '''
4647     @param dfxp_data A bytes-like object containing DFXP data
4648     @returns A unicode object containing converted SRT data
4649     '''
4650     LEGACY_NAMESPACES = (
4651         (b'http://www.w3.org/ns/ttml', [
4652             b'http://www.w3.org/2004/11/ttaf1',
4653             b'http://www.w3.org/2006/04/ttaf1',
4654             b'http://www.w3.org/2006/10/ttaf1',
4655         ]),
4656         (b'http://www.w3.org/ns/ttml#styling', [
4657             b'http://www.w3.org/ns/ttml#style',
4658         ]),
4659     )
4660
4661     SUPPORTED_STYLING = [
4662         'color',
4663         'fontFamily',
4664         'fontSize',
4665         'fontStyle',
4666         'fontWeight',
4667         'textDecoration'
4668     ]
4669
4670     _x = functools.partial(xpath_with_ns, ns_map={
4671         'xml': 'http://www.w3.org/XML/1998/namespace',
4672         'ttml': 'http://www.w3.org/ns/ttml',
4673         'tts': 'http://www.w3.org/ns/ttml#styling',
4674     })
4675
4676     styles = {}
4677     default_style = {}
4678
4679     class TTMLPElementParser(object):
4680         _out = ''
4681         _unclosed_elements = []
4682         _applied_styles = []
4683
4684         def start(self, tag, attrib):
4685             if tag in (_x('ttml:br'), 'br'):
4686                 self._out += '\n'
4687             else:
4688                 unclosed_elements = []
4689                 style = {}
4690                 element_style_id = attrib.get('style')
4691                 if default_style:
4692                     style.update(default_style)
4693                 if element_style_id:
4694                     style.update(styles.get(element_style_id, {}))
4695                 for prop in SUPPORTED_STYLING:
4696                     prop_val = attrib.get(_x('tts:' + prop))
4697                     if prop_val:
4698                         style[prop] = prop_val
4699                 if style:
4700                     font = ''
4701                     for k, v in sorted(style.items()):
4702                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4703                             continue
4704                         if k == 'color':
4705                             font += ' color="%s"' % v
4706                         elif k == 'fontSize':
4707                             font += ' size="%s"' % v
4708                         elif k == 'fontFamily':
4709                             font += ' face="%s"' % v
4710                         elif k == 'fontWeight' and v == 'bold':
4711                             self._out += '<b>'
4712                             unclosed_elements.append('b')
4713                         elif k == 'fontStyle' and v == 'italic':
4714                             self._out += '<i>'
4715                             unclosed_elements.append('i')
4716                         elif k == 'textDecoration' and v == 'underline':
4717                             self._out += '<u>'
4718                             unclosed_elements.append('u')
4719                     if font:
4720                         self._out += '<font' + font + '>'
4721                         unclosed_elements.append('font')
4722                     applied_style = {}
4723                     if self._applied_styles:
4724                         applied_style.update(self._applied_styles[-1])
4725                     applied_style.update(style)
4726                     self._applied_styles.append(applied_style)
4727                 self._unclosed_elements.append(unclosed_elements)
4728
4729         def end(self, tag):
4730             if tag not in (_x('ttml:br'), 'br'):
4731                 unclosed_elements = self._unclosed_elements.pop()
4732                 for element in reversed(unclosed_elements):
4733                     self._out += '</%s>' % element
4734                 if unclosed_elements and self._applied_styles:
4735                     self._applied_styles.pop()
4736
4737         def data(self, data):
4738             self._out += data
4739
4740         def close(self):
4741             return self._out.strip()
4742
4743     def parse_node(node):
4744         target = TTMLPElementParser()
4745         parser = xml.etree.ElementTree.XMLParser(target=target)
4746         parser.feed(xml.etree.ElementTree.tostring(node))
4747         return parser.close()
4748
4749     for k, v in LEGACY_NAMESPACES:
4750         for ns in v:
4751             dfxp_data = dfxp_data.replace(ns, k)
4752
4753     dfxp = compat_etree_fromstring(dfxp_data)
4754     out = []
4755     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4756
4757     if not paras:
4758         raise ValueError('Invalid dfxp/TTML subtitle')
4759
4760     repeat = False
4761     while True:
4762         for style in dfxp.findall(_x('.//ttml:style')):
4763             style_id = style.get('id') or style.get(_x('xml:id'))
4764             if not style_id:
4765                 continue
4766             parent_style_id = style.get('style')
4767             if parent_style_id:
4768                 if parent_style_id not in styles:
4769                     repeat = True
4770                     continue
4771                 styles[style_id] = styles[parent_style_id].copy()
4772             for prop in SUPPORTED_STYLING:
4773                 prop_val = style.get(_x('tts:' + prop))
4774                 if prop_val:
4775                     styles.setdefault(style_id, {})[prop] = prop_val
4776         if repeat:
4777             repeat = False
4778         else:
4779             break
4780
4781     for p in ('body', 'div'):
4782         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4783         if ele is None:
4784             continue
4785         style = styles.get(ele.get('style'))
4786         if not style:
4787             continue
4788         default_style.update(style)
4789
4790     for para, index in zip(paras, itertools.count(1)):
4791         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4792         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4793         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4794         if begin_time is None:
4795             continue
4796         if not end_time:
4797             if not dur:
4798                 continue
4799             end_time = begin_time + dur
4800         out.append('%d\n%s --> %s\n%s\n\n' % (
4801             index,
4802             srt_subtitles_timecode(begin_time),
4803             srt_subtitles_timecode(end_time),
4804             parse_node(para)))
4805
4806     return ''.join(out)
4807
4808
4809 def cli_option(params, command_option, param):
4810     param = params.get(param)
4811     if param:
4812         param = compat_str(param)
4813     return [command_option, param] if param is not None else []
4814
4815
4816 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4817     param = params.get(param)
4818     if param is None:
4819         return []
4820     assert isinstance(param, bool)
4821     if separator:
4822         return [command_option + separator + (true_value if param else false_value)]
4823     return [command_option, true_value if param else false_value]
4824
4825
4826 def cli_valueless_option(params, command_option, param, expected_value=True):
4827     param = params.get(param)
4828     return [command_option] if param == expected_value else []
4829
4830
4831 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4832     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4833         if use_compat:
4834             return argdict
4835         else:
4836             argdict = None
4837     if argdict is None:
4838         return default
4839     assert isinstance(argdict, dict)
4840
4841     assert isinstance(keys, (list, tuple))
4842     for key_list in keys:
4843         if isinstance(key_list, compat_str):
4844             key_list = (key_list,)
4845         arg_list = list(filter(
4846             lambda x: x is not None,
4847             [argdict.get(key.lower()) for key in key_list]))
4848         if arg_list:
4849             return [arg for args in arg_list for arg in args]
4850     return default
4851
4852
4853 class ISO639Utils(object):
4854     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4855     _lang_map = {
4856         'aa': 'aar',
4857         'ab': 'abk',
4858         'ae': 'ave',
4859         'af': 'afr',
4860         'ak': 'aka',
4861         'am': 'amh',
4862         'an': 'arg',
4863         'ar': 'ara',
4864         'as': 'asm',
4865         'av': 'ava',
4866         'ay': 'aym',
4867         'az': 'aze',
4868         'ba': 'bak',
4869         'be': 'bel',
4870         'bg': 'bul',
4871         'bh': 'bih',
4872         'bi': 'bis',
4873         'bm': 'bam',
4874         'bn': 'ben',
4875         'bo': 'bod',
4876         'br': 'bre',
4877         'bs': 'bos',
4878         'ca': 'cat',
4879         'ce': 'che',
4880         'ch': 'cha',
4881         'co': 'cos',
4882         'cr': 'cre',
4883         'cs': 'ces',
4884         'cu': 'chu',
4885         'cv': 'chv',
4886         'cy': 'cym',
4887         'da': 'dan',
4888         'de': 'deu',
4889         'dv': 'div',
4890         'dz': 'dzo',
4891         'ee': 'ewe',
4892         'el': 'ell',
4893         'en': 'eng',
4894         'eo': 'epo',
4895         'es': 'spa',
4896         'et': 'est',
4897         'eu': 'eus',
4898         'fa': 'fas',
4899         'ff': 'ful',
4900         'fi': 'fin',
4901         'fj': 'fij',
4902         'fo': 'fao',
4903         'fr': 'fra',
4904         'fy': 'fry',
4905         'ga': 'gle',
4906         'gd': 'gla',
4907         'gl': 'glg',
4908         'gn': 'grn',
4909         'gu': 'guj',
4910         'gv': 'glv',
4911         'ha': 'hau',
4912         'he': 'heb',
4913         'iw': 'heb',  # Replaced by he in 1989 revision
4914         'hi': 'hin',
4915         'ho': 'hmo',
4916         'hr': 'hrv',
4917         'ht': 'hat',
4918         'hu': 'hun',
4919         'hy': 'hye',
4920         'hz': 'her',
4921         'ia': 'ina',
4922         'id': 'ind',
4923         'in': 'ind',  # Replaced by id in 1989 revision
4924         'ie': 'ile',
4925         'ig': 'ibo',
4926         'ii': 'iii',
4927         'ik': 'ipk',
4928         'io': 'ido',
4929         'is': 'isl',
4930         'it': 'ita',
4931         'iu': 'iku',
4932         'ja': 'jpn',
4933         'jv': 'jav',
4934         'ka': 'kat',
4935         'kg': 'kon',
4936         'ki': 'kik',
4937         'kj': 'kua',
4938         'kk': 'kaz',
4939         'kl': 'kal',
4940         'km': 'khm',
4941         'kn': 'kan',
4942         'ko': 'kor',
4943         'kr': 'kau',
4944         'ks': 'kas',
4945         'ku': 'kur',
4946         'kv': 'kom',
4947         'kw': 'cor',
4948         'ky': 'kir',
4949         'la': 'lat',
4950         'lb': 'ltz',
4951         'lg': 'lug',
4952         'li': 'lim',
4953         'ln': 'lin',
4954         'lo': 'lao',
4955         'lt': 'lit',
4956         'lu': 'lub',
4957         'lv': 'lav',
4958         'mg': 'mlg',
4959         'mh': 'mah',
4960         'mi': 'mri',
4961         'mk': 'mkd',
4962         'ml': 'mal',
4963         'mn': 'mon',
4964         'mr': 'mar',
4965         'ms': 'msa',
4966         'mt': 'mlt',
4967         'my': 'mya',
4968         'na': 'nau',
4969         'nb': 'nob',
4970         'nd': 'nde',
4971         'ne': 'nep',
4972         'ng': 'ndo',
4973         'nl': 'nld',
4974         'nn': 'nno',
4975         'no': 'nor',
4976         'nr': 'nbl',
4977         'nv': 'nav',
4978         'ny': 'nya',
4979         'oc': 'oci',
4980         'oj': 'oji',
4981         'om': 'orm',
4982         'or': 'ori',
4983         'os': 'oss',
4984         'pa': 'pan',
4985         'pi': 'pli',
4986         'pl': 'pol',
4987         'ps': 'pus',
4988         'pt': 'por',
4989         'qu': 'que',
4990         'rm': 'roh',
4991         'rn': 'run',
4992         'ro': 'ron',
4993         'ru': 'rus',
4994         'rw': 'kin',
4995         'sa': 'san',
4996         'sc': 'srd',
4997         'sd': 'snd',
4998         'se': 'sme',
4999         'sg': 'sag',
5000         'si': 'sin',
5001         'sk': 'slk',
5002         'sl': 'slv',
5003         'sm': 'smo',
5004         'sn': 'sna',
5005         'so': 'som',
5006         'sq': 'sqi',
5007         'sr': 'srp',
5008         'ss': 'ssw',
5009         'st': 'sot',
5010         'su': 'sun',
5011         'sv': 'swe',
5012         'sw': 'swa',
5013         'ta': 'tam',
5014         'te': 'tel',
5015         'tg': 'tgk',
5016         'th': 'tha',
5017         'ti': 'tir',
5018         'tk': 'tuk',
5019         'tl': 'tgl',
5020         'tn': 'tsn',
5021         'to': 'ton',
5022         'tr': 'tur',
5023         'ts': 'tso',
5024         'tt': 'tat',
5025         'tw': 'twi',
5026         'ty': 'tah',
5027         'ug': 'uig',
5028         'uk': 'ukr',
5029         'ur': 'urd',
5030         'uz': 'uzb',
5031         've': 'ven',
5032         'vi': 'vie',
5033         'vo': 'vol',
5034         'wa': 'wln',
5035         'wo': 'wol',
5036         'xh': 'xho',
5037         'yi': 'yid',
5038         'ji': 'yid',  # Replaced by yi in 1989 revision
5039         'yo': 'yor',
5040         'za': 'zha',
5041         'zh': 'zho',
5042         'zu': 'zul',
5043     }
5044
5045     @classmethod
5046     def short2long(cls, code):
5047         """Convert language code from ISO 639-1 to ISO 639-2/T"""
5048         return cls._lang_map.get(code[:2])
5049
5050     @classmethod
5051     def long2short(cls, code):
5052         """Convert language code from ISO 639-2/T to ISO 639-1"""
5053         for short_name, long_name in cls._lang_map.items():
5054             if long_name == code:
5055                 return short_name
5056
5057
5058 class ISO3166Utils(object):
5059     # From http://data.okfn.org/data/core/country-list
5060     _country_map = {
5061         'AF': 'Afghanistan',
5062         'AX': 'Åland Islands',
5063         'AL': 'Albania',
5064         'DZ': 'Algeria',
5065         'AS': 'American Samoa',
5066         'AD': 'Andorra',
5067         'AO': 'Angola',
5068         'AI': 'Anguilla',
5069         'AQ': 'Antarctica',
5070         'AG': 'Antigua and Barbuda',
5071         'AR': 'Argentina',
5072         'AM': 'Armenia',
5073         'AW': 'Aruba',
5074         'AU': 'Australia',
5075         'AT': 'Austria',
5076         'AZ': 'Azerbaijan',
5077         'BS': 'Bahamas',
5078         'BH': 'Bahrain',
5079         'BD': 'Bangladesh',
5080         'BB': 'Barbados',
5081         'BY': 'Belarus',
5082         'BE': 'Belgium',
5083         'BZ': 'Belize',
5084         'BJ': 'Benin',
5085         'BM': 'Bermuda',
5086         'BT': 'Bhutan',
5087         'BO': 'Bolivia, Plurinational State of',
5088         'BQ': 'Bonaire, Sint Eustatius and Saba',
5089         'BA': 'Bosnia and Herzegovina',
5090         'BW': 'Botswana',
5091         'BV': 'Bouvet Island',
5092         'BR': 'Brazil',
5093         'IO': 'British Indian Ocean Territory',
5094         'BN': 'Brunei Darussalam',
5095         'BG': 'Bulgaria',
5096         'BF': 'Burkina Faso',
5097         'BI': 'Burundi',
5098         'KH': 'Cambodia',
5099         'CM': 'Cameroon',
5100         'CA': 'Canada',
5101         'CV': 'Cape Verde',
5102         'KY': 'Cayman Islands',
5103         'CF': 'Central African Republic',
5104         'TD': 'Chad',
5105         'CL': 'Chile',
5106         'CN': 'China',
5107         'CX': 'Christmas Island',
5108         'CC': 'Cocos (Keeling) Islands',
5109         'CO': 'Colombia',
5110         'KM': 'Comoros',
5111         'CG': 'Congo',
5112         'CD': 'Congo, the Democratic Republic of the',
5113         'CK': 'Cook Islands',
5114         'CR': 'Costa Rica',
5115         'CI': 'Côte d\'Ivoire',
5116         'HR': 'Croatia',
5117         'CU': 'Cuba',
5118         'CW': 'Curaçao',
5119         'CY': 'Cyprus',
5120         'CZ': 'Czech Republic',
5121         'DK': 'Denmark',
5122         'DJ': 'Djibouti',
5123         'DM': 'Dominica',
5124         'DO': 'Dominican Republic',
5125         'EC': 'Ecuador',
5126         'EG': 'Egypt',
5127         'SV': 'El Salvador',
5128         'GQ': 'Equatorial Guinea',
5129         'ER': 'Eritrea',
5130         'EE': 'Estonia',
5131         'ET': 'Ethiopia',
5132         'FK': 'Falkland Islands (Malvinas)',
5133         'FO': 'Faroe Islands',
5134         'FJ': 'Fiji',
5135         'FI': 'Finland',
5136         'FR': 'France',
5137         'GF': 'French Guiana',
5138         'PF': 'French Polynesia',
5139         'TF': 'French Southern Territories',
5140         'GA': 'Gabon',
5141         'GM': 'Gambia',
5142         'GE': 'Georgia',
5143         'DE': 'Germany',
5144         'GH': 'Ghana',
5145         'GI': 'Gibraltar',
5146         'GR': 'Greece',
5147         'GL': 'Greenland',
5148         'GD': 'Grenada',
5149         'GP': 'Guadeloupe',
5150         'GU': 'Guam',
5151         'GT': 'Guatemala',
5152         'GG': 'Guernsey',
5153         'GN': 'Guinea',
5154         'GW': 'Guinea-Bissau',
5155         'GY': 'Guyana',
5156         'HT': 'Haiti',
5157         'HM': 'Heard Island and McDonald Islands',
5158         'VA': 'Holy See (Vatican City State)',
5159         'HN': 'Honduras',
5160         'HK': 'Hong Kong',
5161         'HU': 'Hungary',
5162         'IS': 'Iceland',
5163         'IN': 'India',
5164         'ID': 'Indonesia',
5165         'IR': 'Iran, Islamic Republic of',
5166         'IQ': 'Iraq',
5167         'IE': 'Ireland',
5168         'IM': 'Isle of Man',
5169         'IL': 'Israel',
5170         'IT': 'Italy',
5171         'JM': 'Jamaica',
5172         'JP': 'Japan',
5173         'JE': 'Jersey',
5174         'JO': 'Jordan',
5175         'KZ': 'Kazakhstan',
5176         'KE': 'Kenya',
5177         'KI': 'Kiribati',
5178         'KP': 'Korea, Democratic People\'s Republic of',
5179         'KR': 'Korea, Republic of',
5180         'KW': 'Kuwait',
5181         'KG': 'Kyrgyzstan',
5182         'LA': 'Lao People\'s Democratic Republic',
5183         'LV': 'Latvia',
5184         'LB': 'Lebanon',
5185         'LS': 'Lesotho',
5186         'LR': 'Liberia',
5187         'LY': 'Libya',
5188         'LI': 'Liechtenstein',
5189         'LT': 'Lithuania',
5190         'LU': 'Luxembourg',
5191         'MO': 'Macao',
5192         'MK': 'Macedonia, the Former Yugoslav Republic of',
5193         'MG': 'Madagascar',
5194         'MW': 'Malawi',
5195         'MY': 'Malaysia',
5196         'MV': 'Maldives',
5197         'ML': 'Mali',
5198         'MT': 'Malta',
5199         'MH': 'Marshall Islands',
5200         'MQ': 'Martinique',
5201         'MR': 'Mauritania',
5202         'MU': 'Mauritius',
5203         'YT': 'Mayotte',
5204         'MX': 'Mexico',
5205         'FM': 'Micronesia, Federated States of',
5206         'MD': 'Moldova, Republic of',
5207         'MC': 'Monaco',
5208         'MN': 'Mongolia',
5209         'ME': 'Montenegro',
5210         'MS': 'Montserrat',
5211         'MA': 'Morocco',
5212         'MZ': 'Mozambique',
5213         'MM': 'Myanmar',
5214         'NA': 'Namibia',
5215         'NR': 'Nauru',
5216         'NP': 'Nepal',
5217         'NL': 'Netherlands',
5218         'NC': 'New Caledonia',
5219         'NZ': 'New Zealand',
5220         'NI': 'Nicaragua',
5221         'NE': 'Niger',
5222         'NG': 'Nigeria',
5223         'NU': 'Niue',
5224         'NF': 'Norfolk Island',
5225         'MP': 'Northern Mariana Islands',
5226         'NO': 'Norway',
5227         'OM': 'Oman',
5228         'PK': 'Pakistan',
5229         'PW': 'Palau',
5230         'PS': 'Palestine, State of',
5231         'PA': 'Panama',
5232         'PG': 'Papua New Guinea',
5233         'PY': 'Paraguay',
5234         'PE': 'Peru',
5235         'PH': 'Philippines',
5236         'PN': 'Pitcairn',
5237         'PL': 'Poland',
5238         'PT': 'Portugal',
5239         'PR': 'Puerto Rico',
5240         'QA': 'Qatar',
5241         'RE': 'Réunion',
5242         'RO': 'Romania',
5243         'RU': 'Russian Federation',
5244         'RW': 'Rwanda',
5245         'BL': 'Saint Barthélemy',
5246         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5247         'KN': 'Saint Kitts and Nevis',
5248         'LC': 'Saint Lucia',
5249         'MF': 'Saint Martin (French part)',
5250         'PM': 'Saint Pierre and Miquelon',
5251         'VC': 'Saint Vincent and the Grenadines',
5252         'WS': 'Samoa',
5253         'SM': 'San Marino',
5254         'ST': 'Sao Tome and Principe',
5255         'SA': 'Saudi Arabia',
5256         'SN': 'Senegal',
5257         'RS': 'Serbia',
5258         'SC': 'Seychelles',
5259         'SL': 'Sierra Leone',
5260         'SG': 'Singapore',
5261         'SX': 'Sint Maarten (Dutch part)',
5262         'SK': 'Slovakia',
5263         'SI': 'Slovenia',
5264         'SB': 'Solomon Islands',
5265         'SO': 'Somalia',
5266         'ZA': 'South Africa',
5267         'GS': 'South Georgia and the South Sandwich Islands',
5268         'SS': 'South Sudan',
5269         'ES': 'Spain',
5270         'LK': 'Sri Lanka',
5271         'SD': 'Sudan',
5272         'SR': 'Suriname',
5273         'SJ': 'Svalbard and Jan Mayen',
5274         'SZ': 'Swaziland',
5275         'SE': 'Sweden',
5276         'CH': 'Switzerland',
5277         'SY': 'Syrian Arab Republic',
5278         'TW': 'Taiwan, Province of China',
5279         'TJ': 'Tajikistan',
5280         'TZ': 'Tanzania, United Republic of',
5281         'TH': 'Thailand',
5282         'TL': 'Timor-Leste',
5283         'TG': 'Togo',
5284         'TK': 'Tokelau',
5285         'TO': 'Tonga',
5286         'TT': 'Trinidad and Tobago',
5287         'TN': 'Tunisia',
5288         'TR': 'Turkey',
5289         'TM': 'Turkmenistan',
5290         'TC': 'Turks and Caicos Islands',
5291         'TV': 'Tuvalu',
5292         'UG': 'Uganda',
5293         'UA': 'Ukraine',
5294         'AE': 'United Arab Emirates',
5295         'GB': 'United Kingdom',
5296         'US': 'United States',
5297         'UM': 'United States Minor Outlying Islands',
5298         'UY': 'Uruguay',
5299         'UZ': 'Uzbekistan',
5300         'VU': 'Vanuatu',
5301         'VE': 'Venezuela, Bolivarian Republic of',
5302         'VN': 'Viet Nam',
5303         'VG': 'Virgin Islands, British',
5304         'VI': 'Virgin Islands, U.S.',
5305         'WF': 'Wallis and Futuna',
5306         'EH': 'Western Sahara',
5307         'YE': 'Yemen',
5308         'ZM': 'Zambia',
5309         'ZW': 'Zimbabwe',
5310     }
5311
5312     @classmethod
5313     def short2full(cls, code):
5314         """Convert an ISO 3166-2 country code to the corresponding full name"""
5315         return cls._country_map.get(code.upper())
5316
5317
5318 class GeoUtils(object):
5319     # Major IPv4 address blocks per country
5320     _country_ip_map = {
5321         'AD': '46.172.224.0/19',
5322         'AE': '94.200.0.0/13',
5323         'AF': '149.54.0.0/17',
5324         'AG': '209.59.64.0/18',
5325         'AI': '204.14.248.0/21',
5326         'AL': '46.99.0.0/16',
5327         'AM': '46.70.0.0/15',
5328         'AO': '105.168.0.0/13',
5329         'AP': '182.50.184.0/21',
5330         'AQ': '23.154.160.0/24',
5331         'AR': '181.0.0.0/12',
5332         'AS': '202.70.112.0/20',
5333         'AT': '77.116.0.0/14',
5334         'AU': '1.128.0.0/11',
5335         'AW': '181.41.0.0/18',
5336         'AX': '185.217.4.0/22',
5337         'AZ': '5.197.0.0/16',
5338         'BA': '31.176.128.0/17',
5339         'BB': '65.48.128.0/17',
5340         'BD': '114.130.0.0/16',
5341         'BE': '57.0.0.0/8',
5342         'BF': '102.178.0.0/15',
5343         'BG': '95.42.0.0/15',
5344         'BH': '37.131.0.0/17',
5345         'BI': '154.117.192.0/18',
5346         'BJ': '137.255.0.0/16',
5347         'BL': '185.212.72.0/23',
5348         'BM': '196.12.64.0/18',
5349         'BN': '156.31.0.0/16',
5350         'BO': '161.56.0.0/16',
5351         'BQ': '161.0.80.0/20',
5352         'BR': '191.128.0.0/12',
5353         'BS': '24.51.64.0/18',
5354         'BT': '119.2.96.0/19',
5355         'BW': '168.167.0.0/16',
5356         'BY': '178.120.0.0/13',
5357         'BZ': '179.42.192.0/18',
5358         'CA': '99.224.0.0/11',
5359         'CD': '41.243.0.0/16',
5360         'CF': '197.242.176.0/21',
5361         'CG': '160.113.0.0/16',
5362         'CH': '85.0.0.0/13',
5363         'CI': '102.136.0.0/14',
5364         'CK': '202.65.32.0/19',
5365         'CL': '152.172.0.0/14',
5366         'CM': '102.244.0.0/14',
5367         'CN': '36.128.0.0/10',
5368         'CO': '181.240.0.0/12',
5369         'CR': '201.192.0.0/12',
5370         'CU': '152.206.0.0/15',
5371         'CV': '165.90.96.0/19',
5372         'CW': '190.88.128.0/17',
5373         'CY': '31.153.0.0/16',
5374         'CZ': '88.100.0.0/14',
5375         'DE': '53.0.0.0/8',
5376         'DJ': '197.241.0.0/17',
5377         'DK': '87.48.0.0/12',
5378         'DM': '192.243.48.0/20',
5379         'DO': '152.166.0.0/15',
5380         'DZ': '41.96.0.0/12',
5381         'EC': '186.68.0.0/15',
5382         'EE': '90.190.0.0/15',
5383         'EG': '156.160.0.0/11',
5384         'ER': '196.200.96.0/20',
5385         'ES': '88.0.0.0/11',
5386         'ET': '196.188.0.0/14',
5387         'EU': '2.16.0.0/13',
5388         'FI': '91.152.0.0/13',
5389         'FJ': '144.120.0.0/16',
5390         'FK': '80.73.208.0/21',
5391         'FM': '119.252.112.0/20',
5392         'FO': '88.85.32.0/19',
5393         'FR': '90.0.0.0/9',
5394         'GA': '41.158.0.0/15',
5395         'GB': '25.0.0.0/8',
5396         'GD': '74.122.88.0/21',
5397         'GE': '31.146.0.0/16',
5398         'GF': '161.22.64.0/18',
5399         'GG': '62.68.160.0/19',
5400         'GH': '154.160.0.0/12',
5401         'GI': '95.164.0.0/16',
5402         'GL': '88.83.0.0/19',
5403         'GM': '160.182.0.0/15',
5404         'GN': '197.149.192.0/18',
5405         'GP': '104.250.0.0/19',
5406         'GQ': '105.235.224.0/20',
5407         'GR': '94.64.0.0/13',
5408         'GT': '168.234.0.0/16',
5409         'GU': '168.123.0.0/16',
5410         'GW': '197.214.80.0/20',
5411         'GY': '181.41.64.0/18',
5412         'HK': '113.252.0.0/14',
5413         'HN': '181.210.0.0/16',
5414         'HR': '93.136.0.0/13',
5415         'HT': '148.102.128.0/17',
5416         'HU': '84.0.0.0/14',
5417         'ID': '39.192.0.0/10',
5418         'IE': '87.32.0.0/12',
5419         'IL': '79.176.0.0/13',
5420         'IM': '5.62.80.0/20',
5421         'IN': '117.192.0.0/10',
5422         'IO': '203.83.48.0/21',
5423         'IQ': '37.236.0.0/14',
5424         'IR': '2.176.0.0/12',
5425         'IS': '82.221.0.0/16',
5426         'IT': '79.0.0.0/10',
5427         'JE': '87.244.64.0/18',
5428         'JM': '72.27.0.0/17',
5429         'JO': '176.29.0.0/16',
5430         'JP': '133.0.0.0/8',
5431         'KE': '105.48.0.0/12',
5432         'KG': '158.181.128.0/17',
5433         'KH': '36.37.128.0/17',
5434         'KI': '103.25.140.0/22',
5435         'KM': '197.255.224.0/20',
5436         'KN': '198.167.192.0/19',
5437         'KP': '175.45.176.0/22',
5438         'KR': '175.192.0.0/10',
5439         'KW': '37.36.0.0/14',
5440         'KY': '64.96.0.0/15',
5441         'KZ': '2.72.0.0/13',
5442         'LA': '115.84.64.0/18',
5443         'LB': '178.135.0.0/16',
5444         'LC': '24.92.144.0/20',
5445         'LI': '82.117.0.0/19',
5446         'LK': '112.134.0.0/15',
5447         'LR': '102.183.0.0/16',
5448         'LS': '129.232.0.0/17',
5449         'LT': '78.56.0.0/13',
5450         'LU': '188.42.0.0/16',
5451         'LV': '46.109.0.0/16',
5452         'LY': '41.252.0.0/14',
5453         'MA': '105.128.0.0/11',
5454         'MC': '88.209.64.0/18',
5455         'MD': '37.246.0.0/16',
5456         'ME': '178.175.0.0/17',
5457         'MF': '74.112.232.0/21',
5458         'MG': '154.126.0.0/17',
5459         'MH': '117.103.88.0/21',
5460         'MK': '77.28.0.0/15',
5461         'ML': '154.118.128.0/18',
5462         'MM': '37.111.0.0/17',
5463         'MN': '49.0.128.0/17',
5464         'MO': '60.246.0.0/16',
5465         'MP': '202.88.64.0/20',
5466         'MQ': '109.203.224.0/19',
5467         'MR': '41.188.64.0/18',
5468         'MS': '208.90.112.0/22',
5469         'MT': '46.11.0.0/16',
5470         'MU': '105.16.0.0/12',
5471         'MV': '27.114.128.0/18',
5472         'MW': '102.70.0.0/15',
5473         'MX': '187.192.0.0/11',
5474         'MY': '175.136.0.0/13',
5475         'MZ': '197.218.0.0/15',
5476         'NA': '41.182.0.0/16',
5477         'NC': '101.101.0.0/18',
5478         'NE': '197.214.0.0/18',
5479         'NF': '203.17.240.0/22',
5480         'NG': '105.112.0.0/12',
5481         'NI': '186.76.0.0/15',
5482         'NL': '145.96.0.0/11',
5483         'NO': '84.208.0.0/13',
5484         'NP': '36.252.0.0/15',
5485         'NR': '203.98.224.0/19',
5486         'NU': '49.156.48.0/22',
5487         'NZ': '49.224.0.0/14',
5488         'OM': '5.36.0.0/15',
5489         'PA': '186.72.0.0/15',
5490         'PE': '186.160.0.0/14',
5491         'PF': '123.50.64.0/18',
5492         'PG': '124.240.192.0/19',
5493         'PH': '49.144.0.0/13',
5494         'PK': '39.32.0.0/11',
5495         'PL': '83.0.0.0/11',
5496         'PM': '70.36.0.0/20',
5497         'PR': '66.50.0.0/16',
5498         'PS': '188.161.0.0/16',
5499         'PT': '85.240.0.0/13',
5500         'PW': '202.124.224.0/20',
5501         'PY': '181.120.0.0/14',
5502         'QA': '37.210.0.0/15',
5503         'RE': '102.35.0.0/16',
5504         'RO': '79.112.0.0/13',
5505         'RS': '93.86.0.0/15',
5506         'RU': '5.136.0.0/13',
5507         'RW': '41.186.0.0/16',
5508         'SA': '188.48.0.0/13',
5509         'SB': '202.1.160.0/19',
5510         'SC': '154.192.0.0/11',
5511         'SD': '102.120.0.0/13',
5512         'SE': '78.64.0.0/12',
5513         'SG': '8.128.0.0/10',
5514         'SI': '188.196.0.0/14',
5515         'SK': '78.98.0.0/15',
5516         'SL': '102.143.0.0/17',
5517         'SM': '89.186.32.0/19',
5518         'SN': '41.82.0.0/15',
5519         'SO': '154.115.192.0/18',
5520         'SR': '186.179.128.0/17',
5521         'SS': '105.235.208.0/21',
5522         'ST': '197.159.160.0/19',
5523         'SV': '168.243.0.0/16',
5524         'SX': '190.102.0.0/20',
5525         'SY': '5.0.0.0/16',
5526         'SZ': '41.84.224.0/19',
5527         'TC': '65.255.48.0/20',
5528         'TD': '154.68.128.0/19',
5529         'TG': '196.168.0.0/14',
5530         'TH': '171.96.0.0/13',
5531         'TJ': '85.9.128.0/18',
5532         'TK': '27.96.24.0/21',
5533         'TL': '180.189.160.0/20',
5534         'TM': '95.85.96.0/19',
5535         'TN': '197.0.0.0/11',
5536         'TO': '175.176.144.0/21',
5537         'TR': '78.160.0.0/11',
5538         'TT': '186.44.0.0/15',
5539         'TV': '202.2.96.0/19',
5540         'TW': '120.96.0.0/11',
5541         'TZ': '156.156.0.0/14',
5542         'UA': '37.52.0.0/14',
5543         'UG': '102.80.0.0/13',
5544         'US': '6.0.0.0/8',
5545         'UY': '167.56.0.0/13',
5546         'UZ': '84.54.64.0/18',
5547         'VA': '212.77.0.0/19',
5548         'VC': '207.191.240.0/21',
5549         'VE': '186.88.0.0/13',
5550         'VG': '66.81.192.0/20',
5551         'VI': '146.226.0.0/16',
5552         'VN': '14.160.0.0/11',
5553         'VU': '202.80.32.0/20',
5554         'WF': '117.20.32.0/21',
5555         'WS': '202.4.32.0/19',
5556         'YE': '134.35.0.0/16',
5557         'YT': '41.242.116.0/22',
5558         'ZA': '41.0.0.0/11',
5559         'ZM': '102.144.0.0/13',
5560         'ZW': '102.177.192.0/18',
5561     }
5562
5563     @classmethod
5564     def random_ipv4(cls, code_or_block):
5565         if len(code_or_block) == 2:
5566             block = cls._country_ip_map.get(code_or_block.upper())
5567             if not block:
5568                 return None
5569         else:
5570             block = code_or_block
5571         addr, preflen = block.split('/')
5572         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5573         addr_max = addr_min | (0xffffffff >> int(preflen))
5574         return compat_str(socket.inet_ntoa(
5575             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5576
5577
5578 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5579     def __init__(self, proxies=None):
5580         # Set default handlers
5581         for type in ('http', 'https'):
5582             setattr(self, '%s_open' % type,
5583                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5584                         meth(r, proxy, type))
5585         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5586
5587     def proxy_open(self, req, proxy, type):
5588         req_proxy = req.headers.get('Ytdl-request-proxy')
5589         if req_proxy is not None:
5590             proxy = req_proxy
5591             del req.headers['Ytdl-request-proxy']
5592
5593         if proxy == '__noproxy__':
5594             return None  # No Proxy
5595         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5596             req.add_header('Ytdl-socks-proxy', proxy)
5597             # yt-dlp's http/https handlers do wrapping the socket with socks
5598             return None
5599         return compat_urllib_request.ProxyHandler.proxy_open(
5600             self, req, proxy, type)
5601
5602
5603 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5604 # released into Public Domain
5605 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5606
5607 def long_to_bytes(n, blocksize=0):
5608     """long_to_bytes(n:long, blocksize:int) : string
5609     Convert a long integer to a byte string.
5610
5611     If optional blocksize is given and greater than zero, pad the front of the
5612     byte string with binary zeros so that the length is a multiple of
5613     blocksize.
5614     """
5615     # after much testing, this algorithm was deemed to be the fastest
5616     s = b''
5617     n = int(n)
5618     while n > 0:
5619         s = compat_struct_pack('>I', n & 0xffffffff) + s
5620         n = n >> 32
5621     # strip off leading zeros
5622     for i in range(len(s)):
5623         if s[i] != b'\000'[0]:
5624             break
5625     else:
5626         # only happens when n == 0
5627         s = b'\000'
5628         i = 0
5629     s = s[i:]
5630     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5631     # de-padding being done above, but sigh...
5632     if blocksize > 0 and len(s) % blocksize:
5633         s = (blocksize - len(s) % blocksize) * b'\000' + s
5634     return s
5635
5636
5637 def bytes_to_long(s):
5638     """bytes_to_long(string) : long
5639     Convert a byte string to a long integer.
5640
5641     This is (essentially) the inverse of long_to_bytes().
5642     """
5643     acc = 0
5644     length = len(s)
5645     if length % 4:
5646         extra = (4 - length % 4)
5647         s = b'\000' * extra + s
5648         length = length + extra
5649     for i in range(0, length, 4):
5650         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5651     return acc
5652
5653
5654 def ohdave_rsa_encrypt(data, exponent, modulus):
5655     '''
5656     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5657
5658     Input:
5659         data: data to encrypt, bytes-like object
5660         exponent, modulus: parameter e and N of RSA algorithm, both integer
5661     Output: hex string of encrypted data
5662
5663     Limitation: supports one block encryption only
5664     '''
5665
5666     payload = int(binascii.hexlify(data[::-1]), 16)
5667     encrypted = pow(payload, exponent, modulus)
5668     return '%x' % encrypted
5669
5670
5671 def pkcs1pad(data, length):
5672     """
5673     Padding input data with PKCS#1 scheme
5674
5675     @param {int[]} data        input data
5676     @param {int}   length      target length
5677     @returns {int[]}           padded data
5678     """
5679     if len(data) > length - 11:
5680         raise ValueError('Input data too long for PKCS#1 padding')
5681
5682     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5683     return [0, 2] + pseudo_random + [0] + data
5684
5685
5686 def encode_base_n(num, n, table=None):
5687     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5688     if not table:
5689         table = FULL_TABLE[:n]
5690
5691     if n > len(table):
5692         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5693
5694     if num == 0:
5695         return table[0]
5696
5697     ret = ''
5698     while num:
5699         ret = table[num % n] + ret
5700         num = num // n
5701     return ret
5702
5703
5704 def decode_packed_codes(code):
5705     mobj = re.search(PACKED_CODES_RE, code)
5706     obfuscated_code, base, count, symbols = mobj.groups()
5707     base = int(base)
5708     count = int(count)
5709     symbols = symbols.split('|')
5710     symbol_table = {}
5711
5712     while count:
5713         count -= 1
5714         base_n_count = encode_base_n(count, base)
5715         symbol_table[base_n_count] = symbols[count] or base_n_count
5716
5717     return re.sub(
5718         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5719         obfuscated_code)
5720
5721
5722 def caesar(s, alphabet, shift):
5723     if shift == 0:
5724         return s
5725     l = len(alphabet)
5726     return ''.join(
5727         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5728         for c in s)
5729
5730
5731 def rot47(s):
5732     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5733
5734
5735 def parse_m3u8_attributes(attrib):
5736     info = {}
5737     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5738         if val.startswith('"'):
5739             val = val[1:-1]
5740         info[key] = val
5741     return info
5742
5743
5744 def urshift(val, n):
5745     return val >> n if val >= 0 else (val + 0x100000000) >> n
5746
5747
5748 # Based on png2str() written by @gdkchan and improved by @yokrysty
5749 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5750 def decode_png(png_data):
5751     # Reference: https://www.w3.org/TR/PNG/
5752     header = png_data[8:]
5753
5754     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5755         raise IOError('Not a valid PNG file.')
5756
5757     int_map = {1: '>B', 2: '>H', 4: '>I'}
5758     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5759
5760     chunks = []
5761
5762     while header:
5763         length = unpack_integer(header[:4])
5764         header = header[4:]
5765
5766         chunk_type = header[:4]
5767         header = header[4:]
5768
5769         chunk_data = header[:length]
5770         header = header[length:]
5771
5772         header = header[4:]  # Skip CRC
5773
5774         chunks.append({
5775             'type': chunk_type,
5776             'length': length,
5777             'data': chunk_data
5778         })
5779
5780     ihdr = chunks[0]['data']
5781
5782     width = unpack_integer(ihdr[:4])
5783     height = unpack_integer(ihdr[4:8])
5784
5785     idat = b''
5786
5787     for chunk in chunks:
5788         if chunk['type'] == b'IDAT':
5789             idat += chunk['data']
5790
5791     if not idat:
5792         raise IOError('Unable to read PNG data.')
5793
5794     decompressed_data = bytearray(zlib.decompress(idat))
5795
5796     stride = width * 3
5797     pixels = []
5798
5799     def _get_pixel(idx):
5800         x = idx % stride
5801         y = idx // stride
5802         return pixels[y][x]
5803
5804     for y in range(height):
5805         basePos = y * (1 + stride)
5806         filter_type = decompressed_data[basePos]
5807
5808         current_row = []
5809
5810         pixels.append(current_row)
5811
5812         for x in range(stride):
5813             color = decompressed_data[1 + basePos + x]
5814             basex = y * stride + x
5815             left = 0
5816             up = 0
5817
5818             if x > 2:
5819                 left = _get_pixel(basex - 3)
5820             if y > 0:
5821                 up = _get_pixel(basex - stride)
5822
5823             if filter_type == 1:  # Sub
5824                 color = (color + left) & 0xff
5825             elif filter_type == 2:  # Up
5826                 color = (color + up) & 0xff
5827             elif filter_type == 3:  # Average
5828                 color = (color + ((left + up) >> 1)) & 0xff
5829             elif filter_type == 4:  # Paeth
5830                 a = left
5831                 b = up
5832                 c = 0
5833
5834                 if x > 2 and y > 0:
5835                     c = _get_pixel(basex - stride - 3)
5836
5837                 p = a + b - c
5838
5839                 pa = abs(p - a)
5840                 pb = abs(p - b)
5841                 pc = abs(p - c)
5842
5843                 if pa <= pb and pa <= pc:
5844                     color = (color + a) & 0xff
5845                 elif pb <= pc:
5846                     color = (color + b) & 0xff
5847                 else:
5848                     color = (color + c) & 0xff
5849
5850             current_row.append(color)
5851
5852     return width, height, pixels
5853
5854
5855 def write_xattr(path, key, value):
5856     # This mess below finds the best xattr tool for the job
5857     try:
5858         # try the pyxattr module...
5859         import xattr
5860
5861         if hasattr(xattr, 'set'):  # pyxattr
5862             # Unicode arguments are not supported in python-pyxattr until
5863             # version 0.5.0
5864             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5865             pyxattr_required_version = '0.5.0'
5866             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5867                 # TODO: fallback to CLI tools
5868                 raise XAttrUnavailableError(
5869                     'python-pyxattr is detected but is too old. '
5870                     'yt-dlp requires %s or above while your version is %s. '
5871                     'Falling back to other xattr implementations' % (
5872                         pyxattr_required_version, xattr.__version__))
5873
5874             setxattr = xattr.set
5875         else:  # xattr
5876             setxattr = xattr.setxattr
5877
5878         try:
5879             setxattr(path, key, value)
5880         except EnvironmentError as e:
5881             raise XAttrMetadataError(e.errno, e.strerror)
5882
5883     except ImportError:
5884         if compat_os_name == 'nt':
5885             # Write xattrs to NTFS Alternate Data Streams:
5886             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5887             assert ':' not in key
5888             assert os.path.exists(path)
5889
5890             ads_fn = path + ':' + key
5891             try:
5892                 with open(ads_fn, 'wb') as f:
5893                     f.write(value)
5894             except EnvironmentError as e:
5895                 raise XAttrMetadataError(e.errno, e.strerror)
5896         else:
5897             user_has_setfattr = check_executable('setfattr', ['--version'])
5898             user_has_xattr = check_executable('xattr', ['-h'])
5899
5900             if user_has_setfattr or user_has_xattr:
5901
5902                 value = value.decode('utf-8')
5903                 if user_has_setfattr:
5904                     executable = 'setfattr'
5905                     opts = ['-n', key, '-v', value]
5906                 elif user_has_xattr:
5907                     executable = 'xattr'
5908                     opts = ['-w', key, value]
5909
5910                 cmd = ([encodeFilename(executable, True)]
5911                        + [encodeArgument(o) for o in opts]
5912                        + [encodeFilename(path, True)])
5913
5914                 try:
5915                     p = subprocess.Popen(
5916                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5917                 except EnvironmentError as e:
5918                     raise XAttrMetadataError(e.errno, e.strerror)
5919                 stdout, stderr = process_communicate_or_kill(p)
5920                 stderr = stderr.decode('utf-8', 'replace')
5921                 if p.returncode != 0:
5922                     raise XAttrMetadataError(p.returncode, stderr)
5923
5924             else:
5925                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5926                 if sys.platform.startswith('linux'):
5927                     raise XAttrUnavailableError(
5928                         "Couldn't find a tool to set the xattrs. "
5929                         "Install either the python 'pyxattr' or 'xattr' "
5930                         "modules, or the GNU 'attr' package "
5931                         "(which contains the 'setfattr' tool).")
5932                 else:
5933                     raise XAttrUnavailableError(
5934                         "Couldn't find a tool to set the xattrs. "
5935                         "Install either the python 'xattr' module, "
5936                         "or the 'xattr' binary.")
5937
5938
5939 def random_birthday(year_field, month_field, day_field):
5940     start_date = datetime.date(1950, 1, 1)
5941     end_date = datetime.date(1995, 12, 31)
5942     offset = random.randint(0, (end_date - start_date).days)
5943     random_date = start_date + datetime.timedelta(offset)
5944     return {
5945         year_field: str(random_date.year),
5946         month_field: str(random_date.month),
5947         day_field: str(random_date.day),
5948     }
5949
5950
5951 # Templates for internet shortcut files, which are plain text files.
5952 DOT_URL_LINK_TEMPLATE = '''
5953 [InternetShortcut]
5954 URL=%(url)s
5955 '''.lstrip()
5956
5957 DOT_WEBLOC_LINK_TEMPLATE = '''
5958 <?xml version="1.0" encoding="UTF-8"?>
5959 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5960 <plist version="1.0">
5961 <dict>
5962 \t<key>URL</key>
5963 \t<string>%(url)s</string>
5964 </dict>
5965 </plist>
5966 '''.lstrip()
5967
5968 DOT_DESKTOP_LINK_TEMPLATE = '''
5969 [Desktop Entry]
5970 Encoding=UTF-8
5971 Name=%(filename)s
5972 Type=Link
5973 URL=%(url)s
5974 Icon=text-html
5975 '''.lstrip()
5976
5977
5978 def iri_to_uri(iri):
5979     """
5980     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5981
5982     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5983     """
5984
5985     iri_parts = compat_urllib_parse_urlparse(iri)
5986
5987     if '[' in iri_parts.netloc:
5988         raise ValueError('IPv6 URIs are not, yet, supported.')
5989         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5990
5991     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5992
5993     net_location = ''
5994     if iri_parts.username:
5995         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5996         if iri_parts.password is not None:
5997             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5998         net_location += '@'
5999
6000     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
6001     # The 'idna' encoding produces ASCII text.
6002     if iri_parts.port is not None and iri_parts.port != 80:
6003         net_location += ':' + str(iri_parts.port)
6004
6005     return compat_urllib_parse_urlunparse(
6006         (iri_parts.scheme,
6007             net_location,
6008
6009             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6010
6011             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6012             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6013
6014             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6015             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6016
6017             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6018
6019     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6020
6021
6022 def to_high_limit_path(path):
6023     if sys.platform in ['win32', 'cygwin']:
6024         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6025         return r'\\?\ '.rstrip() + os.path.abspath(path)
6026
6027     return path
6028
6029
6030 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6031     val = obj.get(field, default)
6032     if func and val not in ignore:
6033         val = func(val)
6034     return template % val if val not in ignore else default
6035
6036
6037 def clean_podcast_url(url):
6038     return re.sub(r'''(?x)
6039         (?:
6040             (?:
6041                 chtbl\.com/track|
6042                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6043                 play\.podtrac\.com
6044             )/[^/]+|
6045             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6046             flex\.acast\.com|
6047             pd(?:
6048                 cn\.co| # https://podcorn.com/analytics-prefix/
6049                 st\.fm # https://podsights.com/docs/
6050             )/e
6051         )/''', '', url)
6052
6053
6054 _HEX_TABLE = '0123456789abcdef'
6055
6056
6057 def random_uuidv4():
6058     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6059
6060
6061 def make_dir(path, to_screen=None):
6062     try:
6063         dn = os.path.dirname(path)
6064         if dn and not os.path.exists(dn):
6065             os.makedirs(dn)
6066         return True
6067     except (OSError, IOError) as err:
6068         if callable(to_screen) is not None:
6069             to_screen('unable to create directory ' + error_to_compat_str(err))
6070         return False
6071
6072
6073 def get_executable_path():
6074     from zipimport import zipimporter
6075     if hasattr(sys, 'frozen'):  # Running from PyInstaller
6076         path = os.path.dirname(sys.executable)
6077     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
6078         path = os.path.join(os.path.dirname(__file__), '../..')
6079     else:
6080         path = os.path.join(os.path.dirname(__file__), '..')
6081     return os.path.abspath(path)
6082
6083
6084 def load_plugins(name, suffix, namespace):
6085     plugin_info = [None]
6086     classes = []
6087     try:
6088         plugin_info = imp.find_module(
6089             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6090         plugins = imp.load_module(name, *plugin_info)
6091         for name in dir(plugins):
6092             if name in namespace:
6093                 continue
6094             if not name.endswith(suffix):
6095                 continue
6096             klass = getattr(plugins, name)
6097             classes.append(klass)
6098             namespace[name] = klass
6099     except ImportError:
6100         pass
6101     finally:
6102         if plugin_info[0] is not None:
6103             plugin_info[0].close()
6104     return classes
6105
6106
6107 def traverse_dict(dictn, keys, casesense=True):
6108     keys = list(keys)[::-1]
6109     while keys:
6110         key = keys.pop()
6111         if isinstance(dictn, dict):
6112             if not casesense:
6113                 dictn = {k.lower(): v for k, v in dictn.items()}
6114                 key = key.lower()
6115             dictn = dictn.get(key)
6116         elif isinstance(dictn, (list, tuple, compat_str)):
6117             if ':' in key:
6118                 key = slice(*map(int_or_none, key.split(':')))
6119             else:
6120                 key = int_or_none(key)
6121             dictn = try_get(dictn, lambda x: x[key])
6122         else:
6123             return None
6124     return dictn