yt_dlp/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_numeric_types,
  54     compat_kwargs,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_parse_urlunparse,
  66     compat_urllib_parse_quote,
  67     compat_urllib_parse_quote_plus,
  68     compat_urllib_parse_unquote_plus,
  69     compat_urllib_request,
  70     compat_urlparse,
  71     compat_xpath,
  72 )
  73
  74 from .socks import (
  75     ProxyType,
  76     sockssocket,
  77 )
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '74.0.3729.129',
  97         '76.0.3780.3',
  98         '76.0.3780.2',
  99         '74.0.3729.128',
 100         '76.0.3780.1',
 101         '76.0.3780.0',
 102         '75.0.3770.15',
 103         '74.0.3729.127',
 104         '74.0.3729.126',
 105         '76.0.3779.1',
 106         '76.0.3779.0',
 107         '75.0.3770.14',
 108         '74.0.3729.125',
 109         '76.0.3778.1',
 110         '76.0.3778.0',
 111         '75.0.3770.13',
 112         '74.0.3729.124',
 113         '74.0.3729.123',
 114         '73.0.3683.121',
 115         '76.0.3777.1',
 116         '76.0.3777.0',
 117         '75.0.3770.12',
 118         '74.0.3729.122',
 119         '76.0.3776.4',
 120         '75.0.3770.11',
 121         '74.0.3729.121',
 122         '76.0.3776.3',
 123         '76.0.3776.2',
 124         '73.0.3683.120',
 125         '74.0.3729.120',
 126         '74.0.3729.119',
 127         '74.0.3729.118',
 128         '76.0.3776.1',
 129         '76.0.3776.0',
 130         '76.0.3775.5',
 131         '75.0.3770.10',
 132         '74.0.3729.117',
 133         '76.0.3775.4',
 134         '76.0.3775.3',
 135         '74.0.3729.116',
 136         '75.0.3770.9',
 137         '76.0.3775.2',
 138         '76.0.3775.1',
 139         '76.0.3775.0',
 140         '75.0.3770.8',
 141         '74.0.3729.115',
 142         '74.0.3729.114',
 143         '76.0.3774.1',
 144         '76.0.3774.0',
 145         '75.0.3770.7',
 146         '74.0.3729.113',
 147         '74.0.3729.112',
 148         '74.0.3729.111',
 149         '76.0.3773.1',
 150         '76.0.3773.0',
 151         '75.0.3770.6',
 152         '74.0.3729.110',
 153         '74.0.3729.109',
 154         '76.0.3772.1',
 155         '76.0.3772.0',
 156         '75.0.3770.5',
 157         '74.0.3729.108',
 158         '74.0.3729.107',
 159         '76.0.3771.1',
 160         '76.0.3771.0',
 161         '75.0.3770.4',
 162         '74.0.3729.106',
 163         '74.0.3729.105',
 164         '75.0.3770.3',
 165         '74.0.3729.104',
 166         '74.0.3729.103',
 167         '74.0.3729.102',
 168         '75.0.3770.2',
 169         '74.0.3729.101',
 170         '75.0.3770.1',
 171         '75.0.3770.0',
 172         '74.0.3729.100',
 173         '75.0.3769.5',
 174         '75.0.3769.4',
 175         '74.0.3729.99',
 176         '75.0.3769.3',
 177         '75.0.3769.2',
 178         '75.0.3768.6',
 179         '74.0.3729.98',
 180         '75.0.3769.1',
 181         '75.0.3769.0',
 182         '74.0.3729.97',
 183         '73.0.3683.119',
 184         '73.0.3683.118',
 185         '74.0.3729.96',
 186         '75.0.3768.5',
 187         '75.0.3768.4',
 188         '75.0.3768.3',
 189         '75.0.3768.2',
 190         '74.0.3729.95',
 191         '74.0.3729.94',
 192         '75.0.3768.1',
 193         '75.0.3768.0',
 194         '74.0.3729.93',
 195         '74.0.3729.92',
 196         '73.0.3683.117',
 197         '74.0.3729.91',
 198         '75.0.3766.3',
 199         '74.0.3729.90',
 200         '75.0.3767.2',
 201         '75.0.3767.1',
 202         '75.0.3767.0',
 203         '74.0.3729.89',
 204         '73.0.3683.116',
 205         '75.0.3766.2',
 206         '74.0.3729.88',
 207         '75.0.3766.1',
 208         '75.0.3766.0',
 209         '74.0.3729.87',
 210         '73.0.3683.115',
 211         '74.0.3729.86',
 212         '75.0.3765.1',
 213         '75.0.3765.0',
 214         '74.0.3729.85',
 215         '73.0.3683.114',
 216         '74.0.3729.84',
 217         '75.0.3764.1',
 218         '75.0.3764.0',
 219         '74.0.3729.83',
 220         '73.0.3683.113',
 221         '75.0.3763.2',
 222         '75.0.3761.4',
 223         '74.0.3729.82',
 224         '75.0.3763.1',
 225         '75.0.3763.0',
 226         '74.0.3729.81',
 227         '73.0.3683.112',
 228         '75.0.3762.1',
 229         '75.0.3762.0',
 230         '74.0.3729.80',
 231         '75.0.3761.3',
 232         '74.0.3729.79',
 233         '73.0.3683.111',
 234         '75.0.3761.2',
 235         '74.0.3729.78',
 236         '74.0.3729.77',
 237         '75.0.3761.1',
 238         '75.0.3761.0',
 239         '73.0.3683.110',
 240         '74.0.3729.76',
 241         '74.0.3729.75',
 242         '75.0.3760.0',
 243         '74.0.3729.74',
 244         '75.0.3759.8',
 245         '75.0.3759.7',
 246         '75.0.3759.6',
 247         '74.0.3729.73',
 248         '75.0.3759.5',
 249         '74.0.3729.72',
 250         '73.0.3683.109',
 251         '75.0.3759.4',
 252         '75.0.3759.3',
 253         '74.0.3729.71',
 254         '75.0.3759.2',
 255         '74.0.3729.70',
 256         '73.0.3683.108',
 257         '74.0.3729.69',
 258         '75.0.3759.1',
 259         '75.0.3759.0',
 260         '74.0.3729.68',
 261         '73.0.3683.107',
 262         '74.0.3729.67',
 263         '75.0.3758.1',
 264         '75.0.3758.0',
 265         '74.0.3729.66',
 266         '73.0.3683.106',
 267         '74.0.3729.65',
 268         '75.0.3757.1',
 269         '75.0.3757.0',
 270         '74.0.3729.64',
 271         '73.0.3683.105',
 272         '74.0.3729.63',
 273         '75.0.3756.1',
 274         '75.0.3756.0',
 275         '74.0.3729.62',
 276         '73.0.3683.104',
 277         '75.0.3755.3',
 278         '75.0.3755.2',
 279         '73.0.3683.103',
 280         '75.0.3755.1',
 281         '75.0.3755.0',
 282         '74.0.3729.61',
 283         '73.0.3683.102',
 284         '74.0.3729.60',
 285         '75.0.3754.2',
 286         '74.0.3729.59',
 287         '75.0.3753.4',
 288         '74.0.3729.58',
 289         '75.0.3754.1',
 290         '75.0.3754.0',
 291         '74.0.3729.57',
 292         '73.0.3683.101',
 293         '75.0.3753.3',
 294         '75.0.3752.2',
 295         '75.0.3753.2',
 296         '74.0.3729.56',
 297         '75.0.3753.1',
 298         '75.0.3753.0',
 299         '74.0.3729.55',
 300         '73.0.3683.100',
 301         '74.0.3729.54',
 302         '75.0.3752.1',
 303         '75.0.3752.0',
 304         '74.0.3729.53',
 305         '73.0.3683.99',
 306         '74.0.3729.52',
 307         '75.0.3751.1',
 308         '75.0.3751.0',
 309         '74.0.3729.51',
 310         '73.0.3683.98',
 311         '74.0.3729.50',
 312         '75.0.3750.0',
 313         '74.0.3729.49',
 314         '74.0.3729.48',
 315         '74.0.3729.47',
 316         '75.0.3749.3',
 317         '74.0.3729.46',
 318         '73.0.3683.97',
 319         '75.0.3749.2',
 320         '74.0.3729.45',
 321         '75.0.3749.1',
 322         '75.0.3749.0',
 323         '74.0.3729.44',
 324         '73.0.3683.96',
 325         '74.0.3729.43',
 326         '74.0.3729.42',
 327         '75.0.3748.1',
 328         '75.0.3748.0',
 329         '74.0.3729.41',
 330         '75.0.3747.1',
 331         '73.0.3683.95',
 332         '75.0.3746.4',
 333         '74.0.3729.40',
 334         '74.0.3729.39',
 335         '75.0.3747.0',
 336         '75.0.3746.3',
 337         '75.0.3746.2',
 338         '74.0.3729.38',
 339         '75.0.3746.1',
 340         '75.0.3746.0',
 341         '74.0.3729.37',
 342         '73.0.3683.94',
 343         '75.0.3745.5',
 344         '75.0.3745.4',
 345         '75.0.3745.3',
 346         '75.0.3745.2',
 347         '74.0.3729.36',
 348         '75.0.3745.1',
 349         '75.0.3745.0',
 350         '75.0.3744.2',
 351         '74.0.3729.35',
 352         '73.0.3683.93',
 353         '74.0.3729.34',
 354         '75.0.3744.1',
 355         '75.0.3744.0',
 356         '74.0.3729.33',
 357         '73.0.3683.92',
 358         '74.0.3729.32',
 359         '74.0.3729.31',
 360         '73.0.3683.91',
 361         '75.0.3741.2',
 362         '75.0.3740.5',
 363         '74.0.3729.30',
 364         '75.0.3741.1',
 365         '75.0.3741.0',
 366         '74.0.3729.29',
 367         '75.0.3740.4',
 368         '73.0.3683.90',
 369         '74.0.3729.28',
 370         '75.0.3740.3',
 371         '73.0.3683.89',
 372         '75.0.3740.2',
 373         '74.0.3729.27',
 374         '75.0.3740.1',
 375         '75.0.3740.0',
 376         '74.0.3729.26',
 377         '73.0.3683.88',
 378         '73.0.3683.87',
 379         '74.0.3729.25',
 380         '75.0.3739.1',
 381         '75.0.3739.0',
 382         '73.0.3683.86',
 383         '74.0.3729.24',
 384         '73.0.3683.85',
 385         '75.0.3738.4',
 386         '75.0.3738.3',
 387         '75.0.3738.2',
 388         '75.0.3738.1',
 389         '75.0.3738.0',
 390         '74.0.3729.23',
 391         '73.0.3683.84',
 392         '74.0.3729.22',
 393         '74.0.3729.21',
 394         '75.0.3737.1',
 395         '75.0.3737.0',
 396         '74.0.3729.20',
 397         '73.0.3683.83',
 398         '74.0.3729.19',
 399         '75.0.3736.1',
 400         '75.0.3736.0',
 401         '74.0.3729.18',
 402         '73.0.3683.82',
 403         '74.0.3729.17',
 404         '75.0.3735.1',
 405         '75.0.3735.0',
 406         '74.0.3729.16',
 407         '73.0.3683.81',
 408         '75.0.3734.1',
 409         '75.0.3734.0',
 410         '74.0.3729.15',
 411         '73.0.3683.80',
 412         '74.0.3729.14',
 413         '75.0.3733.1',
 414         '75.0.3733.0',
 415         '75.0.3732.1',
 416         '74.0.3729.13',
 417         '74.0.3729.12',
 418         '73.0.3683.79',
 419         '74.0.3729.11',
 420         '75.0.3732.0',
 421         '74.0.3729.10',
 422         '73.0.3683.78',
 423         '74.0.3729.9',
 424         '74.0.3729.8',
 425         '74.0.3729.7',
 426         '75.0.3731.3',
 427         '75.0.3731.2',
 428         '75.0.3731.0',
 429         '74.0.3729.6',
 430         '73.0.3683.77',
 431         '73.0.3683.76',
 432         '75.0.3730.5',
 433         '75.0.3730.4',
 434         '73.0.3683.75',
 435         '74.0.3729.5',
 436         '73.0.3683.74',
 437         '75.0.3730.3',
 438         '75.0.3730.2',
 439         '74.0.3729.4',
 440         '73.0.3683.73',
 441         '73.0.3683.72',
 442         '75.0.3730.1',
 443         '75.0.3730.0',
 444         '74.0.3729.3',
 445         '73.0.3683.71',
 446         '74.0.3729.2',
 447         '73.0.3683.70',
 448         '74.0.3729.1',
 449         '74.0.3729.0',
 450         '74.0.3726.4',
 451         '73.0.3683.69',
 452         '74.0.3726.3',
 453         '74.0.3728.0',
 454         '74.0.3726.2',
 455         '73.0.3683.68',
 456         '74.0.3726.1',
 457         '74.0.3726.0',
 458         '74.0.3725.4',
 459         '73.0.3683.67',
 460         '73.0.3683.66',
 461         '74.0.3725.3',
 462         '74.0.3725.2',
 463         '74.0.3725.1',
 464         '74.0.3724.8',
 465         '74.0.3725.0',
 466         '73.0.3683.65',
 467         '74.0.3724.7',
 468         '74.0.3724.6',
 469         '74.0.3724.5',
 470         '74.0.3724.4',
 471         '74.0.3724.3',
 472         '74.0.3724.2',
 473         '74.0.3724.1',
 474         '74.0.3724.0',
 475         '73.0.3683.64',
 476         '74.0.3723.1',
 477         '74.0.3723.0',
 478         '73.0.3683.63',
 479         '74.0.3722.1',
 480         '74.0.3722.0',
 481         '73.0.3683.62',
 482         '74.0.3718.9',
 483         '74.0.3702.3',
 484         '74.0.3721.3',
 485         '74.0.3721.2',
 486         '74.0.3721.1',
 487         '74.0.3721.0',
 488         '74.0.3720.6',
 489         '73.0.3683.61',
 490         '72.0.3626.122',
 491         '73.0.3683.60',
 492         '74.0.3720.5',
 493         '72.0.3626.121',
 494         '74.0.3718.8',
 495         '74.0.3720.4',
 496         '74.0.3720.3',
 497         '74.0.3718.7',
 498         '74.0.3720.2',
 499         '74.0.3720.1',
 500         '74.0.3720.0',
 501         '74.0.3718.6',
 502         '74.0.3719.5',
 503         '73.0.3683.59',
 504         '74.0.3718.5',
 505         '74.0.3718.4',
 506         '74.0.3719.4',
 507         '74.0.3719.3',
 508         '74.0.3719.2',
 509         '74.0.3719.1',
 510         '73.0.3683.58',
 511         '74.0.3719.0',
 512         '73.0.3683.57',
 513         '73.0.3683.56',
 514         '74.0.3718.3',
 515         '73.0.3683.55',
 516         '74.0.3718.2',
 517         '74.0.3718.1',
 518         '74.0.3718.0',
 519         '73.0.3683.54',
 520         '74.0.3717.2',
 521         '73.0.3683.53',
 522         '74.0.3717.1',
 523         '74.0.3717.0',
 524         '73.0.3683.52',
 525         '74.0.3716.1',
 526         '74.0.3716.0',
 527         '73.0.3683.51',
 528         '74.0.3715.1',
 529         '74.0.3715.0',
 530         '73.0.3683.50',
 531         '74.0.3711.2',
 532         '74.0.3714.2',
 533         '74.0.3713.3',
 534         '74.0.3714.1',
 535         '74.0.3714.0',
 536         '73.0.3683.49',
 537         '74.0.3713.1',
 538         '74.0.3713.0',
 539         '72.0.3626.120',
 540         '73.0.3683.48',
 541         '74.0.3712.2',
 542         '74.0.3712.1',
 543         '74.0.3712.0',
 544         '73.0.3683.47',
 545         '72.0.3626.119',
 546         '73.0.3683.46',
 547         '74.0.3710.2',
 548         '72.0.3626.118',
 549         '74.0.3711.1',
 550         '74.0.3711.0',
 551         '73.0.3683.45',
 552         '72.0.3626.117',
 553         '74.0.3710.1',
 554         '74.0.3710.0',
 555         '73.0.3683.44',
 556         '72.0.3626.116',
 557         '74.0.3709.1',
 558         '74.0.3709.0',
 559         '74.0.3704.9',
 560         '73.0.3683.43',
 561         '72.0.3626.115',
 562         '74.0.3704.8',
 563         '74.0.3704.7',
 564         '74.0.3708.0',
 565         '74.0.3706.7',
 566         '74.0.3704.6',
 567         '73.0.3683.42',
 568         '72.0.3626.114',
 569         '74.0.3706.6',
 570         '72.0.3626.113',
 571         '74.0.3704.5',
 572         '74.0.3706.5',
 573         '74.0.3706.4',
 574         '74.0.3706.3',
 575         '74.0.3706.2',
 576         '74.0.3706.1',
 577         '74.0.3706.0',
 578         '73.0.3683.41',
 579         '72.0.3626.112',
 580         '74.0.3705.1',
 581         '74.0.3705.0',
 582         '73.0.3683.40',
 583         '72.0.3626.111',
 584         '73.0.3683.39',
 585         '74.0.3704.4',
 586         '73.0.3683.38',
 587         '74.0.3704.3',
 588         '74.0.3704.2',
 589         '74.0.3704.1',
 590         '74.0.3704.0',
 591         '73.0.3683.37',
 592         '72.0.3626.110',
 593         '72.0.3626.109',
 594         '74.0.3703.3',
 595         '74.0.3703.2',
 596         '73.0.3683.36',
 597         '74.0.3703.1',
 598         '74.0.3703.0',
 599         '73.0.3683.35',
 600         '72.0.3626.108',
 601         '74.0.3702.2',
 602         '74.0.3699.3',
 603         '74.0.3702.1',
 604         '74.0.3702.0',
 605         '73.0.3683.34',
 606         '72.0.3626.107',
 607         '73.0.3683.33',
 608         '74.0.3701.1',
 609         '74.0.3701.0',
 610         '73.0.3683.32',
 611         '73.0.3683.31',
 612         '72.0.3626.105',
 613         '74.0.3700.1',
 614         '74.0.3700.0',
 615         '73.0.3683.29',
 616         '72.0.3626.103',
 617         '74.0.3699.2',
 618         '74.0.3699.1',
 619         '74.0.3699.0',
 620         '73.0.3683.28',
 621         '72.0.3626.102',
 622         '73.0.3683.27',
 623         '73.0.3683.26',
 624         '74.0.3698.0',
 625         '74.0.3696.2',
 626         '72.0.3626.101',
 627         '73.0.3683.25',
 628         '74.0.3696.1',
 629         '74.0.3696.0',
 630         '74.0.3694.8',
 631         '72.0.3626.100',
 632         '74.0.3694.7',
 633         '74.0.3694.6',
 634         '74.0.3694.5',
 635         '74.0.3694.4',
 636         '72.0.3626.99',
 637         '72.0.3626.98',
 638         '74.0.3694.3',
 639         '73.0.3683.24',
 640         '72.0.3626.97',
 641         '72.0.3626.96',
 642         '72.0.3626.95',
 643         '73.0.3683.23',
 644         '72.0.3626.94',
 645         '73.0.3683.22',
 646         '73.0.3683.21',
 647         '72.0.3626.93',
 648         '74.0.3694.2',
 649         '72.0.3626.92',
 650         '74.0.3694.1',
 651         '74.0.3694.0',
 652         '74.0.3693.6',
 653         '73.0.3683.20',
 654         '72.0.3626.91',
 655         '74.0.3693.5',
 656         '74.0.3693.4',
 657         '74.0.3693.3',
 658         '74.0.3693.2',
 659         '73.0.3683.19',
 660         '74.0.3693.1',
 661         '74.0.3693.0',
 662         '73.0.3683.18',
 663         '72.0.3626.90',
 664         '74.0.3692.1',
 665         '74.0.3692.0',
 666         '73.0.3683.17',
 667         '72.0.3626.89',
 668         '74.0.3687.3',
 669         '74.0.3691.1',
 670         '74.0.3691.0',
 671         '73.0.3683.16',
 672         '72.0.3626.88',
 673         '72.0.3626.87',
 674         '73.0.3683.15',
 675         '74.0.3690.1',
 676         '74.0.3690.0',
 677         '73.0.3683.14',
 678         '72.0.3626.86',
 679         '73.0.3683.13',
 680         '73.0.3683.12',
 681         '74.0.3689.1',
 682         '74.0.3689.0',
 683         '73.0.3683.11',
 684         '72.0.3626.85',
 685         '73.0.3683.10',
 686         '72.0.3626.84',
 687         '73.0.3683.9',
 688         '74.0.3688.1',
 689         '74.0.3688.0',
 690         '73.0.3683.8',
 691         '72.0.3626.83',
 692         '74.0.3687.2',
 693         '74.0.3687.1',
 694         '74.0.3687.0',
 695         '73.0.3683.7',
 696         '72.0.3626.82',
 697         '74.0.3686.4',
 698         '72.0.3626.81',
 699         '74.0.3686.3',
 700         '74.0.3686.2',
 701         '74.0.3686.1',
 702         '74.0.3686.0',
 703         '73.0.3683.6',
 704         '72.0.3626.80',
 705         '74.0.3685.1',
 706         '74.0.3685.0',
 707         '73.0.3683.5',
 708         '72.0.3626.79',
 709         '74.0.3684.1',
 710         '74.0.3684.0',
 711         '73.0.3683.4',
 712         '72.0.3626.78',
 713         '72.0.3626.77',
 714         '73.0.3683.3',
 715         '73.0.3683.2',
 716         '72.0.3626.76',
 717         '73.0.3683.1',
 718         '73.0.3683.0',
 719         '72.0.3626.75',
 720         '71.0.3578.141',
 721         '73.0.3682.1',
 722         '73.0.3682.0',
 723         '72.0.3626.74',
 724         '71.0.3578.140',
 725         '73.0.3681.4',
 726         '73.0.3681.3',
 727         '73.0.3681.2',
 728         '73.0.3681.1',
 729         '73.0.3681.0',
 730         '72.0.3626.73',
 731         '71.0.3578.139',
 732         '72.0.3626.72',
 733         '72.0.3626.71',
 734         '73.0.3680.1',
 735         '73.0.3680.0',
 736         '72.0.3626.70',
 737         '71.0.3578.138',
 738         '73.0.3678.2',
 739         '73.0.3679.1',
 740         '73.0.3679.0',
 741         '72.0.3626.69',
 742         '71.0.3578.137',
 743         '73.0.3678.1',
 744         '73.0.3678.0',
 745         '71.0.3578.136',
 746         '73.0.3677.1',
 747         '73.0.3677.0',
 748         '72.0.3626.68',
 749         '72.0.3626.67',
 750         '71.0.3578.135',
 751         '73.0.3676.1',
 752         '73.0.3676.0',
 753         '73.0.3674.2',
 754         '72.0.3626.66',
 755         '71.0.3578.134',
 756         '73.0.3674.1',
 757         '73.0.3674.0',
 758         '72.0.3626.65',
 759         '71.0.3578.133',
 760         '73.0.3673.2',
 761         '73.0.3673.1',
 762         '73.0.3673.0',
 763         '72.0.3626.64',
 764         '71.0.3578.132',
 765         '72.0.3626.63',
 766         '72.0.3626.62',
 767         '72.0.3626.61',
 768         '72.0.3626.60',
 769         '73.0.3672.1',
 770         '73.0.3672.0',
 771         '72.0.3626.59',
 772         '71.0.3578.131',
 773         '73.0.3671.3',
 774         '73.0.3671.2',
 775         '73.0.3671.1',
 776         '73.0.3671.0',
 777         '72.0.3626.58',
 778         '71.0.3578.130',
 779         '73.0.3670.1',
 780         '73.0.3670.0',
 781         '72.0.3626.57',
 782         '71.0.3578.129',
 783         '73.0.3669.1',
 784         '73.0.3669.0',
 785         '72.0.3626.56',
 786         '71.0.3578.128',
 787         '73.0.3668.2',
 788         '73.0.3668.1',
 789         '73.0.3668.0',
 790         '72.0.3626.55',
 791         '71.0.3578.127',
 792         '73.0.3667.2',
 793         '73.0.3667.1',
 794         '73.0.3667.0',
 795         '72.0.3626.54',
 796         '71.0.3578.126',
 797         '73.0.3666.1',
 798         '73.0.3666.0',
 799         '72.0.3626.53',
 800         '71.0.3578.125',
 801         '73.0.3665.4',
 802         '73.0.3665.3',
 803         '72.0.3626.52',
 804         '73.0.3665.2',
 805         '73.0.3664.4',
 806         '73.0.3665.1',
 807         '73.0.3665.0',
 808         '72.0.3626.51',
 809         '71.0.3578.124',
 810         '72.0.3626.50',
 811         '73.0.3664.3',
 812         '73.0.3664.2',
 813         '73.0.3664.1',
 814         '73.0.3664.0',
 815         '73.0.3663.2',
 816         '72.0.3626.49',
 817         '71.0.3578.123',
 818         '73.0.3663.1',
 819         '73.0.3663.0',
 820         '72.0.3626.48',
 821         '71.0.3578.122',
 822         '73.0.3662.1',
 823         '73.0.3662.0',
 824         '72.0.3626.47',
 825         '71.0.3578.121',
 826         '73.0.3661.1',
 827         '72.0.3626.46',
 828         '73.0.3661.0',
 829         '72.0.3626.45',
 830         '71.0.3578.120',
 831         '73.0.3660.2',
 832         '73.0.3660.1',
 833         '73.0.3660.0',
 834         '72.0.3626.44',
 835         '71.0.3578.119',
 836         '73.0.3659.1',
 837         '73.0.3659.0',
 838         '72.0.3626.43',
 839         '71.0.3578.118',
 840         '73.0.3658.1',
 841         '73.0.3658.0',
 842         '72.0.3626.42',
 843         '71.0.3578.117',
 844         '73.0.3657.1',
 845         '73.0.3657.0',
 846         '72.0.3626.41',
 847         '71.0.3578.116',
 848         '73.0.3656.1',
 849         '73.0.3656.0',
 850         '72.0.3626.40',
 851         '71.0.3578.115',
 852         '73.0.3655.1',
 853         '73.0.3655.0',
 854         '72.0.3626.39',
 855         '71.0.3578.114',
 856         '73.0.3654.1',
 857         '73.0.3654.0',
 858         '72.0.3626.38',
 859         '71.0.3578.113',
 860         '73.0.3653.1',
 861         '73.0.3653.0',
 862         '72.0.3626.37',
 863         '71.0.3578.112',
 864         '73.0.3652.1',
 865         '73.0.3652.0',
 866         '72.0.3626.36',
 867         '71.0.3578.111',
 868         '73.0.3651.1',
 869         '73.0.3651.0',
 870         '72.0.3626.35',
 871         '71.0.3578.110',
 872         '73.0.3650.1',
 873         '73.0.3650.0',
 874         '72.0.3626.34',
 875         '71.0.3578.109',
 876         '73.0.3649.1',
 877         '73.0.3649.0',
 878         '72.0.3626.33',
 879         '71.0.3578.108',
 880         '73.0.3648.2',
 881         '73.0.3648.1',
 882         '73.0.3648.0',
 883         '72.0.3626.32',
 884         '71.0.3578.107',
 885         '73.0.3647.2',
 886         '73.0.3647.1',
 887         '73.0.3647.0',
 888         '72.0.3626.31',
 889         '71.0.3578.106',
 890         '73.0.3635.3',
 891         '73.0.3646.2',
 892         '73.0.3646.1',
 893         '73.0.3646.0',
 894         '72.0.3626.30',
 895         '71.0.3578.105',
 896         '72.0.3626.29',
 897         '73.0.3645.2',
 898         '73.0.3645.1',
 899         '73.0.3645.0',
 900         '72.0.3626.28',
 901         '71.0.3578.104',
 902         '72.0.3626.27',
 903         '72.0.3626.26',
 904         '72.0.3626.25',
 905         '72.0.3626.24',
 906         '73.0.3644.0',
 907         '73.0.3643.2',
 908         '72.0.3626.23',
 909         '71.0.3578.103',
 910         '73.0.3643.1',
 911         '73.0.3643.0',
 912         '72.0.3626.22',
 913         '71.0.3578.102',
 914         '73.0.3642.1',
 915         '73.0.3642.0',
 916         '72.0.3626.21',
 917         '71.0.3578.101',
 918         '73.0.3641.1',
 919         '73.0.3641.0',
 920         '72.0.3626.20',
 921         '71.0.3578.100',
 922         '72.0.3626.19',
 923         '73.0.3640.1',
 924         '73.0.3640.0',
 925         '72.0.3626.18',
 926         '73.0.3639.1',
 927         '71.0.3578.99',
 928         '73.0.3639.0',
 929         '72.0.3626.17',
 930         '73.0.3638.2',
 931         '72.0.3626.16',
 932         '73.0.3638.1',
 933         '73.0.3638.0',
 934         '72.0.3626.15',
 935         '71.0.3578.98',
 936         '73.0.3635.2',
 937         '71.0.3578.97',
 938         '73.0.3637.1',
 939         '73.0.3637.0',
 940         '72.0.3626.14',
 941         '71.0.3578.96',
 942         '71.0.3578.95',
 943         '72.0.3626.13',
 944         '71.0.3578.94',
 945         '73.0.3636.2',
 946         '71.0.3578.93',
 947         '73.0.3636.1',
 948         '73.0.3636.0',
 949         '72.0.3626.12',
 950         '71.0.3578.92',
 951         '73.0.3635.1',
 952         '73.0.3635.0',
 953         '72.0.3626.11',
 954         '71.0.3578.91',
 955         '73.0.3634.2',
 956         '73.0.3634.1',
 957         '73.0.3634.0',
 958         '72.0.3626.10',
 959         '71.0.3578.90',
 960         '71.0.3578.89',
 961         '73.0.3633.2',
 962         '73.0.3633.1',
 963         '73.0.3633.0',
 964         '72.0.3610.4',
 965         '72.0.3626.9',
 966         '71.0.3578.88',
 967         '73.0.3632.5',
 968         '73.0.3632.4',
 969         '73.0.3632.3',
 970         '73.0.3632.2',
 971         '73.0.3632.1',
 972         '73.0.3632.0',
 973         '72.0.3626.8',
 974         '71.0.3578.87',
 975         '73.0.3631.2',
 976         '73.0.3631.1',
 977         '73.0.3631.0',
 978         '72.0.3626.7',
 979         '71.0.3578.86',
 980         '72.0.3626.6',
 981         '73.0.3630.1',
 982         '73.0.3630.0',
 983         '72.0.3626.5',
 984         '71.0.3578.85',
 985         '72.0.3626.4',
 986         '73.0.3628.3',
 987         '73.0.3628.2',
 988         '73.0.3629.1',
 989         '73.0.3629.0',
 990         '72.0.3626.3',
 991         '71.0.3578.84',
 992         '73.0.3628.1',
 993         '73.0.3628.0',
 994         '71.0.3578.83',
 995         '73.0.3627.1',
 996         '73.0.3627.0',
 997         '72.0.3626.2',
 998         '71.0.3578.82',
 999         '71.0.3578.81',
1000         '71.0.3578.80',
1001         '72.0.3626.1',
1002         '72.0.3626.0',
1003         '71.0.3578.79',
1004         '70.0.3538.124',
1005         '71.0.3578.78',
1006         '72.0.3623.4',
1007         '72.0.3625.2',
1008         '72.0.3625.1',
1009         '72.0.3625.0',
1010         '71.0.3578.77',
1011         '70.0.3538.123',
1012         '72.0.3624.4',
1013         '72.0.3624.3',
1014         '72.0.3624.2',
1015         '71.0.3578.76',
1016         '72.0.3624.1',
1017         '72.0.3624.0',
1018         '72.0.3623.3',
1019         '71.0.3578.75',
1020         '70.0.3538.122',
1021         '71.0.3578.74',
1022         '72.0.3623.2',
1023         '72.0.3610.3',
1024         '72.0.3623.1',
1025         '72.0.3623.0',
1026         '72.0.3622.3',
1027         '72.0.3622.2',
1028         '71.0.3578.73',
1029         '70.0.3538.121',
1030         '72.0.3622.1',
1031         '72.0.3622.0',
1032         '71.0.3578.72',
1033         '70.0.3538.120',
1034         '72.0.3621.1',
1035         '72.0.3621.0',
1036         '71.0.3578.71',
1037         '70.0.3538.119',
1038         '72.0.3620.1',
1039         '72.0.3620.0',
1040         '71.0.3578.70',
1041         '70.0.3538.118',
1042         '71.0.3578.69',
1043         '72.0.3619.1',
1044         '72.0.3619.0',
1045         '71.0.3578.68',
1046         '70.0.3538.117',
1047         '71.0.3578.67',
1048         '72.0.3618.1',
1049         '72.0.3618.0',
1050         '71.0.3578.66',
1051         '70.0.3538.116',
1052         '72.0.3617.1',
1053         '72.0.3617.0',
1054         '71.0.3578.65',
1055         '70.0.3538.115',
1056         '72.0.3602.3',
1057         '71.0.3578.64',
1058         '72.0.3616.1',
1059         '72.0.3616.0',
1060         '71.0.3578.63',
1061         '70.0.3538.114',
1062         '71.0.3578.62',
1063         '72.0.3615.1',
1064         '72.0.3615.0',
1065         '71.0.3578.61',
1066         '70.0.3538.113',
1067         '72.0.3614.1',
1068         '72.0.3614.0',
1069         '71.0.3578.60',
1070         '70.0.3538.112',
1071         '72.0.3613.1',
1072         '72.0.3613.0',
1073         '71.0.3578.59',
1074         '70.0.3538.111',
1075         '72.0.3612.2',
1076         '72.0.3612.1',
1077         '72.0.3612.0',
1078         '70.0.3538.110',
1079         '71.0.3578.58',
1080         '70.0.3538.109',
1081         '72.0.3611.2',
1082         '72.0.3611.1',
1083         '72.0.3611.0',
1084         '71.0.3578.57',
1085         '70.0.3538.108',
1086         '72.0.3610.2',
1087         '71.0.3578.56',
1088         '71.0.3578.55',
1089         '72.0.3610.1',
1090         '72.0.3610.0',
1091         '71.0.3578.54',
1092         '70.0.3538.107',
1093         '71.0.3578.53',
1094         '72.0.3609.3',
1095         '71.0.3578.52',
1096         '72.0.3609.2',
1097         '71.0.3578.51',
1098         '72.0.3608.5',
1099         '72.0.3609.1',
1100         '72.0.3609.0',
1101         '71.0.3578.50',
1102         '70.0.3538.106',
1103         '72.0.3608.4',
1104         '72.0.3608.3',
1105         '72.0.3608.2',
1106         '71.0.3578.49',
1107         '72.0.3608.1',
1108         '72.0.3608.0',
1109         '70.0.3538.105',
1110         '71.0.3578.48',
1111         '72.0.3607.1',
1112         '72.0.3607.0',
1113         '71.0.3578.47',
1114         '70.0.3538.104',
1115         '72.0.3606.2',
1116         '72.0.3606.1',
1117         '72.0.3606.0',
1118         '71.0.3578.46',
1119         '70.0.3538.103',
1120         '70.0.3538.102',
1121         '72.0.3605.3',
1122         '72.0.3605.2',
1123         '72.0.3605.1',
1124         '72.0.3605.0',
1125         '71.0.3578.45',
1126         '70.0.3538.101',
1127         '71.0.3578.44',
1128         '71.0.3578.43',
1129         '70.0.3538.100',
1130         '70.0.3538.99',
1131         '71.0.3578.42',
1132         '72.0.3604.1',
1133         '72.0.3604.0',
1134         '71.0.3578.41',
1135         '70.0.3538.98',
1136         '71.0.3578.40',
1137         '72.0.3603.2',
1138         '72.0.3603.1',
1139         '72.0.3603.0',
1140         '71.0.3578.39',
1141         '70.0.3538.97',
1142         '72.0.3602.2',
1143         '71.0.3578.38',
1144         '71.0.3578.37',
1145         '72.0.3602.1',
1146         '72.0.3602.0',
1147         '71.0.3578.36',
1148         '70.0.3538.96',
1149         '72.0.3601.1',
1150         '72.0.3601.0',
1151         '71.0.3578.35',
1152         '70.0.3538.95',
1153         '72.0.3600.1',
1154         '72.0.3600.0',
1155         '71.0.3578.34',
1156         '70.0.3538.94',
1157         '72.0.3599.3',
1158         '72.0.3599.2',
1159         '72.0.3599.1',
1160         '72.0.3599.0',
1161         '71.0.3578.33',
1162         '70.0.3538.93',
1163         '72.0.3598.1',
1164         '72.0.3598.0',
1165         '71.0.3578.32',
1166         '70.0.3538.87',
1167         '72.0.3597.1',
1168         '72.0.3597.0',
1169         '72.0.3596.2',
1170         '71.0.3578.31',
1171         '70.0.3538.86',
1172         '71.0.3578.30',
1173         '71.0.3578.29',
1174         '72.0.3596.1',
1175         '72.0.3596.0',
1176         '71.0.3578.28',
1177         '70.0.3538.85',
1178         '72.0.3595.2',
1179         '72.0.3591.3',
1180         '72.0.3595.1',
1181         '72.0.3595.0',
1182         '71.0.3578.27',
1183         '70.0.3538.84',
1184         '72.0.3594.1',
1185         '72.0.3594.0',
1186         '71.0.3578.26',
1187         '70.0.3538.83',
1188         '72.0.3593.2',
1189         '72.0.3593.1',
1190         '72.0.3593.0',
1191         '71.0.3578.25',
1192         '70.0.3538.82',
1193         '72.0.3589.3',
1194         '72.0.3592.2',
1195         '72.0.3592.1',
1196         '72.0.3592.0',
1197         '71.0.3578.24',
1198         '72.0.3589.2',
1199         '70.0.3538.81',
1200         '70.0.3538.80',
1201         '72.0.3591.2',
1202         '72.0.3591.1',
1203         '72.0.3591.0',
1204         '71.0.3578.23',
1205         '70.0.3538.79',
1206         '71.0.3578.22',
1207         '72.0.3590.1',
1208         '72.0.3590.0',
1209         '71.0.3578.21',
1210         '70.0.3538.78',
1211         '70.0.3538.77',
1212         '72.0.3589.1',
1213         '72.0.3589.0',
1214         '71.0.3578.20',
1215         '70.0.3538.76',
1216         '71.0.3578.19',
1217         '70.0.3538.75',
1218         '72.0.3588.1',
1219         '72.0.3588.0',
1220         '71.0.3578.18',
1221         '70.0.3538.74',
1222         '72.0.3586.2',
1223         '72.0.3587.0',
1224         '71.0.3578.17',
1225         '70.0.3538.73',
1226         '72.0.3586.1',
1227         '72.0.3586.0',
1228         '71.0.3578.16',
1229         '70.0.3538.72',
1230         '72.0.3585.1',
1231         '72.0.3585.0',
1232         '71.0.3578.15',
1233         '70.0.3538.71',
1234         '71.0.3578.14',
1235         '72.0.3584.1',
1236         '72.0.3584.0',
1237         '71.0.3578.13',
1238         '70.0.3538.70',
1239         '72.0.3583.2',
1240         '71.0.3578.12',
1241         '72.0.3583.1',
1242         '72.0.3583.0',
1243         '71.0.3578.11',
1244         '70.0.3538.69',
1245         '71.0.3578.10',
1246         '72.0.3582.0',
1247         '72.0.3581.4',
1248         '71.0.3578.9',
1249         '70.0.3538.67',
1250         '72.0.3581.3',
1251         '72.0.3581.2',
1252         '72.0.3581.1',
1253         '72.0.3581.0',
1254         '71.0.3578.8',
1255         '70.0.3538.66',
1256         '72.0.3580.1',
1257         '72.0.3580.0',
1258         '71.0.3578.7',
1259         '70.0.3538.65',
1260         '71.0.3578.6',
1261         '72.0.3579.1',
1262         '72.0.3579.0',
1263         '71.0.3578.5',
1264         '70.0.3538.64',
1265         '71.0.3578.4',
1266         '71.0.3578.3',
1267         '71.0.3578.2',
1268         '71.0.3578.1',
1269         '71.0.3578.0',
1270         '70.0.3538.63',
1271         '69.0.3497.128',
1272         '70.0.3538.62',
1273         '70.0.3538.61',
1274         '70.0.3538.60',
1275         '70.0.3538.59',
1276         '71.0.3577.1',
1277         '71.0.3577.0',
1278         '70.0.3538.58',
1279         '69.0.3497.127',
1280         '71.0.3576.2',
1281         '71.0.3576.1',
1282         '71.0.3576.0',
1283         '70.0.3538.57',
1284         '70.0.3538.56',
1285         '71.0.3575.2',
1286         '70.0.3538.55',
1287         '69.0.3497.126',
1288         '70.0.3538.54',
1289         '71.0.3575.1',
1290         '71.0.3575.0',
1291         '71.0.3574.1',
1292         '71.0.3574.0',
1293         '70.0.3538.53',
1294         '69.0.3497.125',
1295         '70.0.3538.52',
1296         '71.0.3573.1',
1297         '71.0.3573.0',
1298         '70.0.3538.51',
1299         '69.0.3497.124',
1300         '71.0.3572.1',
1301         '71.0.3572.0',
1302         '70.0.3538.50',
1303         '69.0.3497.123',
1304         '71.0.3571.2',
1305         '70.0.3538.49',
1306         '69.0.3497.122',
1307         '71.0.3571.1',
1308         '71.0.3571.0',
1309         '70.0.3538.48',
1310         '69.0.3497.121',
1311         '71.0.3570.1',
1312         '71.0.3570.0',
1313         '70.0.3538.47',
1314         '69.0.3497.120',
1315         '71.0.3568.2',
1316         '71.0.3569.1',
1317         '71.0.3569.0',
1318         '70.0.3538.46',
1319         '69.0.3497.119',
1320         '70.0.3538.45',
1321         '71.0.3568.1',
1322         '71.0.3568.0',
1323         '70.0.3538.44',
1324         '69.0.3497.118',
1325         '70.0.3538.43',
1326         '70.0.3538.42',
1327         '71.0.3567.1',
1328         '71.0.3567.0',
1329         '70.0.3538.41',
1330         '69.0.3497.117',
1331         '71.0.3566.1',
1332         '71.0.3566.0',
1333         '70.0.3538.40',
1334         '69.0.3497.116',
1335         '71.0.3565.1',
1336         '71.0.3565.0',
1337         '70.0.3538.39',
1338         '69.0.3497.115',
1339         '71.0.3564.1',
1340         '71.0.3564.0',
1341         '70.0.3538.38',
1342         '69.0.3497.114',
1343         '71.0.3563.0',
1344         '71.0.3562.2',
1345         '70.0.3538.37',
1346         '69.0.3497.113',
1347         '70.0.3538.36',
1348         '70.0.3538.35',
1349         '71.0.3562.1',
1350         '71.0.3562.0',
1351         '70.0.3538.34',
1352         '69.0.3497.112',
1353         '70.0.3538.33',
1354         '71.0.3561.1',
1355         '71.0.3561.0',
1356         '70.0.3538.32',
1357         '69.0.3497.111',
1358         '71.0.3559.6',
1359         '71.0.3560.1',
1360         '71.0.3560.0',
1361         '71.0.3559.5',
1362         '71.0.3559.4',
1363         '70.0.3538.31',
1364         '69.0.3497.110',
1365         '71.0.3559.3',
1366         '70.0.3538.30',
1367         '69.0.3497.109',
1368         '71.0.3559.2',
1369         '71.0.3559.1',
1370         '71.0.3559.0',
1371         '70.0.3538.29',
1372         '69.0.3497.108',
1373         '71.0.3558.2',
1374         '71.0.3558.1',
1375         '71.0.3558.0',
1376         '70.0.3538.28',
1377         '69.0.3497.107',
1378         '71.0.3557.2',
1379         '71.0.3557.1',
1380         '71.0.3557.0',
1381         '70.0.3538.27',
1382         '69.0.3497.106',
1383         '71.0.3554.4',
1384         '70.0.3538.26',
1385         '71.0.3556.1',
1386         '71.0.3556.0',
1387         '70.0.3538.25',
1388         '71.0.3554.3',
1389         '69.0.3497.105',
1390         '71.0.3554.2',
1391         '70.0.3538.24',
1392         '69.0.3497.104',
1393         '71.0.3555.2',
1394         '70.0.3538.23',
1395         '71.0.3555.1',
1396         '71.0.3555.0',
1397         '70.0.3538.22',
1398         '69.0.3497.103',
1399         '71.0.3554.1',
1400         '71.0.3554.0',
1401         '70.0.3538.21',
1402         '69.0.3497.102',
1403         '71.0.3553.3',
1404         '70.0.3538.20',
1405         '69.0.3497.101',
1406         '71.0.3553.2',
1407         '69.0.3497.100',
1408         '71.0.3553.1',
1409         '71.0.3553.0',
1410         '70.0.3538.19',
1411         '69.0.3497.99',
1412         '69.0.3497.98',
1413         '69.0.3497.97',
1414         '71.0.3552.6',
1415         '71.0.3552.5',
1416         '71.0.3552.4',
1417         '71.0.3552.3',
1418         '71.0.3552.2',
1419         '71.0.3552.1',
1420         '71.0.3552.0',
1421         '70.0.3538.18',
1422         '69.0.3497.96',
1423         '71.0.3551.3',
1424         '71.0.3551.2',
1425         '71.0.3551.1',
1426         '71.0.3551.0',
1427         '70.0.3538.17',
1428         '69.0.3497.95',
1429         '71.0.3550.3',
1430         '71.0.3550.2',
1431         '71.0.3550.1',
1432         '71.0.3550.0',
1433         '70.0.3538.16',
1434         '69.0.3497.94',
1435         '71.0.3549.1',
1436         '71.0.3549.0',
1437         '70.0.3538.15',
1438         '69.0.3497.93',
1439         '69.0.3497.92',
1440         '71.0.3548.1',
1441         '71.0.3548.0',
1442         '70.0.3538.14',
1443         '69.0.3497.91',
1444         '71.0.3547.1',
1445         '71.0.3547.0',
1446         '70.0.3538.13',
1447         '69.0.3497.90',
1448         '71.0.3546.2',
1449         '69.0.3497.89',
1450         '71.0.3546.1',
1451         '71.0.3546.0',
1452         '70.0.3538.12',
1453         '69.0.3497.88',
1454         '71.0.3545.4',
1455         '71.0.3545.3',
1456         '71.0.3545.2',
1457         '71.0.3545.1',
1458         '71.0.3545.0',
1459         '70.0.3538.11',
1460         '69.0.3497.87',
1461         '71.0.3544.5',
1462         '71.0.3544.4',
1463         '71.0.3544.3',
1464         '71.0.3544.2',
1465         '71.0.3544.1',
1466         '71.0.3544.0',
1467         '69.0.3497.86',
1468         '70.0.3538.10',
1469         '69.0.3497.85',
1470         '70.0.3538.9',
1471         '69.0.3497.84',
1472         '71.0.3543.4',
1473         '70.0.3538.8',
1474         '71.0.3543.3',
1475         '71.0.3543.2',
1476         '71.0.3543.1',
1477         '71.0.3543.0',
1478         '70.0.3538.7',
1479         '69.0.3497.83',
1480         '71.0.3542.2',
1481         '71.0.3542.1',
1482         '71.0.3542.0',
1483         '70.0.3538.6',
1484         '69.0.3497.82',
1485         '69.0.3497.81',
1486         '71.0.3541.1',
1487         '71.0.3541.0',
1488         '70.0.3538.5',
1489         '69.0.3497.80',
1490         '71.0.3540.1',
1491         '71.0.3540.0',
1492         '70.0.3538.4',
1493         '69.0.3497.79',
1494         '70.0.3538.3',
1495         '71.0.3539.1',
1496         '71.0.3539.0',
1497         '69.0.3497.78',
1498         '68.0.3440.134',
1499         '69.0.3497.77',
1500         '70.0.3538.2',
1501         '70.0.3538.1',
1502         '70.0.3538.0',
1503         '69.0.3497.76',
1504         '68.0.3440.133',
1505         '69.0.3497.75',
1506         '70.0.3537.2',
1507         '70.0.3537.1',
1508         '70.0.3537.0',
1509         '69.0.3497.74',
1510         '68.0.3440.132',
1511         '70.0.3536.0',
1512         '70.0.3535.5',
1513         '70.0.3535.4',
1514         '70.0.3535.3',
1515         '69.0.3497.73',
1516         '68.0.3440.131',
1517         '70.0.3532.8',
1518         '70.0.3532.7',
1519         '69.0.3497.72',
1520         '69.0.3497.71',
1521         '70.0.3535.2',
1522         '70.0.3535.1',
1523         '70.0.3535.0',
1524         '69.0.3497.70',
1525         '68.0.3440.130',
1526         '69.0.3497.69',
1527         '68.0.3440.129',
1528         '70.0.3534.4',
1529         '70.0.3534.3',
1530         '70.0.3534.2',
1531         '70.0.3534.1',
1532         '70.0.3534.0',
1533         '69.0.3497.68',
1534         '68.0.3440.128',
1535         '70.0.3533.2',
1536         '70.0.3533.1',
1537         '70.0.3533.0',
1538         '69.0.3497.67',
1539         '68.0.3440.127',
1540         '70.0.3532.6',
1541         '70.0.3532.5',
1542         '70.0.3532.4',
1543         '69.0.3497.66',
1544         '68.0.3440.126',
1545         '70.0.3532.3',
1546         '70.0.3532.2',
1547         '70.0.3532.1',
1548         '69.0.3497.60',
1549         '69.0.3497.65',
1550         '69.0.3497.64',
1551         '70.0.3532.0',
1552         '70.0.3531.0',
1553         '70.0.3530.4',
1554         '70.0.3530.3',
1555         '70.0.3530.2',
1556         '69.0.3497.58',
1557         '68.0.3440.125',
1558         '69.0.3497.57',
1559         '69.0.3497.56',
1560         '69.0.3497.55',
1561         '69.0.3497.54',
1562         '70.0.3530.1',
1563         '70.0.3530.0',
1564         '69.0.3497.53',
1565         '68.0.3440.124',
1566         '69.0.3497.52',
1567         '70.0.3529.3',
1568         '70.0.3529.2',
1569         '70.0.3529.1',
1570         '70.0.3529.0',
1571         '69.0.3497.51',
1572         '70.0.3528.4',
1573         '68.0.3440.123',
1574         '70.0.3528.3',
1575         '70.0.3528.2',
1576         '70.0.3528.1',
1577         '70.0.3528.0',
1578         '69.0.3497.50',
1579         '68.0.3440.122',
1580         '70.0.3527.1',
1581         '70.0.3527.0',
1582         '69.0.3497.49',
1583         '68.0.3440.121',
1584         '70.0.3526.1',
1585         '70.0.3526.0',
1586         '68.0.3440.120',
1587         '69.0.3497.48',
1588         '69.0.3497.47',
1589         '68.0.3440.119',
1590         '68.0.3440.118',
1591         '70.0.3525.5',
1592         '70.0.3525.4',
1593         '70.0.3525.3',
1594         '68.0.3440.117',
1595         '69.0.3497.46',
1596         '70.0.3525.2',
1597         '70.0.3525.1',
1598         '70.0.3525.0',
1599         '69.0.3497.45',
1600         '68.0.3440.116',
1601         '70.0.3524.4',
1602         '70.0.3524.3',
1603         '69.0.3497.44',
1604         '70.0.3524.2',
1605         '70.0.3524.1',
1606         '70.0.3524.0',
1607         '70.0.3523.2',
1608         '69.0.3497.43',
1609         '68.0.3440.115',
1610         '70.0.3505.9',
1611         '69.0.3497.42',
1612         '70.0.3505.8',
1613         '70.0.3523.1',
1614         '70.0.3523.0',
1615         '69.0.3497.41',
1616         '68.0.3440.114',
1617         '70.0.3505.7',
1618         '69.0.3497.40',
1619         '70.0.3522.1',
1620         '70.0.3522.0',
1621         '70.0.3521.2',
1622         '69.0.3497.39',
1623         '68.0.3440.113',
1624         '70.0.3505.6',
1625         '70.0.3521.1',
1626         '70.0.3521.0',
1627         '69.0.3497.38',
1628         '68.0.3440.112',
1629         '70.0.3520.1',
1630         '70.0.3520.0',
1631         '69.0.3497.37',
1632         '68.0.3440.111',
1633         '70.0.3519.3',
1634         '70.0.3519.2',
1635         '70.0.3519.1',
1636         '70.0.3519.0',
1637         '69.0.3497.36',
1638         '68.0.3440.110',
1639         '70.0.3518.1',
1640         '70.0.3518.0',
1641         '69.0.3497.35',
1642         '69.0.3497.34',
1643         '68.0.3440.109',
1644         '70.0.3517.1',
1645         '70.0.3517.0',
1646         '69.0.3497.33',
1647         '68.0.3440.108',
1648         '69.0.3497.32',
1649         '70.0.3516.3',
1650         '70.0.3516.2',
1651         '70.0.3516.1',
1652         '70.0.3516.0',
1653         '69.0.3497.31',
1654         '68.0.3440.107',
1655         '70.0.3515.4',
1656         '68.0.3440.106',
1657         '70.0.3515.3',
1658         '70.0.3515.2',
1659         '70.0.3515.1',
1660         '70.0.3515.0',
1661         '69.0.3497.30',
1662         '68.0.3440.105',
1663         '68.0.3440.104',
1664         '70.0.3514.2',
1665         '70.0.3514.1',
1666         '70.0.3514.0',
1667         '69.0.3497.29',
1668         '68.0.3440.103',
1669         '70.0.3513.1',
1670         '70.0.3513.0',
1671         '69.0.3497.28',
1672     )
1673     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677     'User-Agent': random_user_agent(),
1678     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680     'Accept-Encoding': 'gzip, deflate',
1681     'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693     'January', 'February', 'March', 'April', 'May', 'June',
1694     'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697     'en': ENGLISH_MONTH_NAMES,
1698     'fr': [
1699         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705     'flv', 'f4v', 'f4a', 'f4b',
1706     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707     'mkv', 'mka', 'mk3d',
1708     'avi', 'divx',
1709     'mov',
1710     'asf', 'wmv', 'wma',
1711     '3gp', '3g2',
1712     'mp3',
1713     'flac',
1714     'ape',
1715     'wav',
1716     'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
1720 # needed for sanitizing filenames in restricted mode
1721 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1722                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1724
1725 DATE_FORMATS = (
1726     '%d %B %Y',
1727     '%d %b %Y',
1728     '%B %d %Y',
1729     '%B %dst %Y',
1730     '%B %dnd %Y',
1731     '%B %drd %Y',
1732     '%B %dth %Y',
1733     '%b %d %Y',
1734     '%b %dst %Y',
1735     '%b %dnd %Y',
1736     '%b %drd %Y',
1737     '%b %dth %Y',
1738     '%b %dst %Y %I:%M',
1739     '%b %dnd %Y %I:%M',
1740     '%b %drd %Y %I:%M',
1741     '%b %dth %Y %I:%M',
1742     '%Y %m %d',
1743     '%Y-%m-%d',
1744     '%Y/%m/%d',
1745     '%Y/%m/%d %H:%M',
1746     '%Y/%m/%d %H:%M:%S',
1747     '%Y-%m-%d %H:%M',
1748     '%Y-%m-%d %H:%M:%S',
1749     '%Y-%m-%d %H:%M:%S.%f',
1750     '%d.%m.%Y %H:%M',
1751     '%d.%m.%Y %H.%M',
1752     '%Y-%m-%dT%H:%M:%SZ',
1753     '%Y-%m-%dT%H:%M:%S.%fZ',
1754     '%Y-%m-%dT%H:%M:%S.%f0Z',
1755     '%Y-%m-%dT%H:%M:%S',
1756     '%Y-%m-%dT%H:%M:%S.%f',
1757     '%Y-%m-%dT%H:%M',
1758     '%b %d %Y at %H:%M',
1759     '%b %d %Y at %H:%M:%S',
1760     '%B %d %Y at %H:%M',
1761     '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766     '%d-%m-%Y',
1767     '%d.%m.%Y',
1768     '%d.%m.%y',
1769     '%d/%m/%Y',
1770     '%d/%m/%y',
1771     '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776     '%m-%d-%Y',
1777     '%m.%d.%Y',
1778     '%m/%d/%Y',
1779     '%m/%d/%y',
1780     '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788     """Get preferred encoding.
1789
1790     Returns the best encoding scheme for the system, based on
1791     locale.getpreferredencoding() and some further tweaks.
1792     """
1793     try:
1794         pref = locale.getpreferredencoding()
1795         'TEST'.encode(pref)
1796     except Exception:
1797         pref = 'UTF-8'
1798
1799     return pref
1800
1801
1802 def write_json_file(obj, fn):
1803     """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805     fn = encodeFilename(fn)
1806     if sys.version_info < (3, 0) and sys.platform != 'win32':
1807         encoding = get_filesystem_encoding()
1808         # os.path.basename returns a bytes object, but NamedTemporaryFile
1809         # will fail if the filename contains non ascii characters unless we
1810         # use a unicode object
1811         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812         # the same for os.path.dirname
1813         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814     else:
1815         path_basename = os.path.basename
1816         path_dirname = os.path.dirname
1817
1818     args = {
1819         'suffix': '.tmp',
1820         'prefix': path_basename(fn) + '.',
1821         'dir': path_dirname(fn),
1822         'delete': False,
1823     }
1824
1825     # In Python 2.x, json.dump expects a bytestream.
1826     # In Python 3.x, it writes to a character stream
1827     if sys.version_info < (3, 0):
1828         args['mode'] = 'wb'
1829     else:
1830         args.update({
1831             'mode': 'w',
1832             'encoding': 'utf-8',
1833         })
1834
1835     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837     try:
1838         with tf:
1839             json.dump(obj, tf, default=repr)
1840         if sys.platform == 'win32':
1841             # Need to remove existing file on Windows, else os.rename raises
1842             # WindowsError or FileExistsError.
1843             try:
1844                 os.unlink(fn)
1845             except OSError:
1846                 pass
1847         try:
1848             mask = os.umask(0)
1849             os.umask(mask)
1850             os.chmod(tf.name, 0o666 & ~mask)
1851         except OSError:
1852             pass
1853         os.rename(tf.name, fn)
1854     except Exception:
1855         try:
1856             os.remove(tf.name)
1857         except OSError:
1858             pass
1859         raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863     def find_xpath_attr(node, xpath, key, val=None):
1864         """ Find the xpath xpath[@key=val] """
1865         assert re.match(r'^[a-zA-Z_-]+$', key)
1866         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867         return node.find(expr)
1868 else:
1869     def find_xpath_attr(node, xpath, key, val=None):
1870         for f in node.findall(compat_xpath(xpath)):
1871             if key not in f.attrib:
1872                 continue
1873             if val is None or f.attrib.get(key) == val:
1874                 return f
1875         return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882     components = [c.split(':') for c in path.split('/')]
1883     replaced = []
1884     for c in components:
1885         if len(c) == 1:
1886             replaced.append(c[0])
1887         else:
1888             ns, tag = c
1889             replaced.append('{%s}%s' % (ns_map[ns], tag))
1890     return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894     def _find_xpath(xpath):
1895         return node.find(compat_xpath(xpath))
1896
1897     if isinstance(xpath, (str, compat_str)):
1898         n = _find_xpath(xpath)
1899     else:
1900         for xp in xpath:
1901             n = _find_xpath(xp)
1902             if n is not None:
1903                 break
1904
1905     if n is None:
1906         if default is not NO_DEFAULT:
1907             return default
1908         elif fatal:
1909             name = xpath if name is None else name
1910             raise ExtractorError('Could not find XML element %s' % name)
1911         else:
1912             return None
1913     return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918     if n is None or n == default:
1919         return n
1920     if n.text is None:
1921         if default is not NO_DEFAULT:
1922             return default
1923         elif fatal:
1924             name = xpath if name is None else name
1925             raise ExtractorError('Could not find XML element\'s text %s' % name)
1926         else:
1927             return None
1928     return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932     n = find_xpath_attr(node, xpath, key)
1933     if n is None:
1934         if default is not NO_DEFAULT:
1935             return default
1936         elif fatal:
1937             name = '%s[@%s]' % (xpath, key) if name is None else name
1938             raise ExtractorError('Could not find XML attribute %s' % name)
1939         else:
1940             return None
1941     return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945     """Return the content of the tag with the specified ID in the passed HTML document"""
1946     return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950     """Return the content of the first tag with the specified class in the passed HTML document"""
1951     retval = get_elements_by_class(class_name, html)
1952     return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957     return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962     return get_elements_by_attribute(
1963         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964         html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968     """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970     value = re.escape(value) if escape_value else value
1971
1972     retlist = []
1973     for m in re.finditer(r'''(?xs)
1974         <([a-zA-Z0-9:._-]+)
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976          \s+%s=['"]?%s['"]?
1977          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978         \s*>
1979         (?P<content>.*?)
1980         </\1>
1981     ''' % (re.escape(attribute), value), html):
1982         res = m.group('content')
1983
1984         if res.startswith('"') or res.startswith("'"):
1985             res = res[1:-1]
1986
1987         retlist.append(unescapeHTML(res))
1988
1989     return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993     """Trivial HTML parser to gather the attributes for a single element"""
1994
1995     def __init__(self):
1996         self.attrs = {}
1997         compat_HTMLParser.__init__(self)
1998
1999     def handle_starttag(self, tag, attrs):
2000         self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004     """Given a string for an HTML element such as
2005     <el
2006          a="foo" B="bar" c="&98;az" d=boz
2007          empty= noval entity="&amp;"
2008          sq='"' dq="'"
2009     >
2010     Decode and return a dictionary of attributes.
2011     {
2012         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013         'empty': '', 'noval': None, 'entity': '&',
2014         'sq': '"', 'dq': '\''
2015     }.
2016     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018     """
2019     parser = HTMLAttributeParser()
2020     try:
2021         parser.feed(html_element)
2022         parser.close()
2023     # Older Python may throw HTMLParseError in case of malformed HTML
2024     except compat_HTMLParseError:
2025         pass
2026     return parser.attrs
2027
2028
2029 def clean_html(html):
2030     """Clean an HTML snippet into a readable string"""
2031
2032     if html is None:  # Convenience for sanitizing descriptions etc.
2033         return html
2034
2035     # Newline vs <br />
2036     html = html.replace('\n', ' ')
2037     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039     # Strip html tags
2040     html = re.sub('<.*?>', '', html)
2041     # Replace html entities
2042     html = unescapeHTML(html)
2043     return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047     """Try to open the given filename, and slightly tweak it if this fails.
2048
2049     Attempts to open the given filename. If this fails, it tries to change
2050     the filename slightly, step by step, until it's either able to open it
2051     or it fails and raises a final exception, like the standard open()
2052     function.
2053
2054     It returns the tuple (stream, definitive_file_name).
2055     """
2056     try:
2057         if filename == '-':
2058             if sys.platform == 'win32':
2059                 import msvcrt
2060                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062         stream = open(encodeFilename(filename), open_mode)
2063         return (stream, filename)
2064     except (IOError, OSError) as err:
2065         if err.errno in (errno.EACCES,):
2066             raise
2067
2068         # In case of error, try to remove win32 forbidden chars
2069         alt_filename = sanitize_path(filename)
2070         if alt_filename == filename:
2071             raise
2072         else:
2073             # An exception here should be caught in the caller
2074             stream = open(encodeFilename(alt_filename), open_mode)
2075             return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079     """Convert RFC 2822 defined time string into system timestamp"""
2080     timestamp = None
2081     timetuple = email.utils.parsedate_tz(timestr)
2082     if timetuple is not None:
2083         timestamp = email.utils.mktime_tz(timetuple)
2084     return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088     """Sanitizes a string so it could be used as part of a filename.
2089     If restricted is set, use a stricter subset of allowed characters.
2090     Set is_id if this is not an arbitrary string, but an ID that should be kept
2091     if possible.
2092     """
2093     def replace_insane(char):
2094         if restricted and char in ACCENT_CHARS:
2095             return ACCENT_CHARS[char]
2096         if char == '?' or ord(char) < 32 or ord(char) == 127:
2097             return ''
2098         elif char == '"':
2099             return '' if restricted else '\''
2100         elif char == ':':
2101             return '_-' if restricted else ' -'
2102         elif char in '\\/|*<>':
2103             return '_'
2104         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105             return '_'
2106         if restricted and ord(char) > 127:
2107             return '_'
2108         return char
2109
2110     # Handle timestamps
2111     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2112     result = ''.join(map(replace_insane, s))
2113     if not is_id:
2114         while '__' in result:
2115             result = result.replace('__', '_')
2116         result = result.strip('_')
2117         # Common case of "Foreign band name - English song title"
2118         if restricted and result.startswith('-_'):
2119             result = result[2:]
2120         if result.startswith('-'):
2121             result = '_' + result[len('-'):]
2122         result = result.lstrip('.')
2123         if not result:
2124             result = '_'
2125     return result
2126
2127
2128 def sanitize_path(s, force=False):
2129     """Sanitizes and normalizes path on Windows"""
2130     if sys.platform == 'win32':
2131         force = False
2132         drive_or_unc, _ = os.path.splitdrive(s)
2133         if sys.version_info < (2, 7) and not drive_or_unc:
2134             drive_or_unc, _ = os.path.splitunc(s)
2135     elif force:
2136         drive_or_unc = ''
2137     else:
2138         return s
2139
2140     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141     if drive_or_unc:
2142         norm_path.pop(0)
2143     sanitized_path = [
2144         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2145         for path_part in norm_path]
2146     if drive_or_unc:
2147         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2148     elif force and s[0] == os.path.sep:
2149         sanitized_path.insert(0, os.path.sep)
2150     return os.path.join(*sanitized_path)
2151
2152
2153 def sanitize_url(url):
2154     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155     # the number of unwanted failures due to missing protocol
2156     if url.startswith('//'):
2157         return 'http:%s' % url
2158     # Fix some common typos seen so far
2159     COMMON_TYPOS = (
2160         # https://github.com/ytdl-org/youtube-dl/issues/15649
2161         (r'^httpss://', r'https://'),
2162         # https://bx1.be/lives/direct-tv/
2163         (r'^rmtp([es]?)://', r'rtmp\1://'),
2164     )
2165     for mistake, fixup in COMMON_TYPOS:
2166         if re.match(mistake, url):
2167             return re.sub(mistake, fixup, url)
2168     return url
2169
2170
2171 def sanitized_Request(url, *args, **kwargs):
2172     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2173
2174
2175 def expand_path(s):
2176     """Expand shell variables and ~"""
2177     return os.path.expandvars(compat_expanduser(s))
2178
2179
2180 def orderedSet(iterable):
2181     """ Remove all duplicates from the input iterable """
2182     res = []
2183     for el in iterable:
2184         if el not in res:
2185             res.append(el)
2186     return res
2187
2188
2189 def _htmlentity_transform(entity_with_semicolon):
2190     """Transforms an HTML entity to a character."""
2191     entity = entity_with_semicolon[:-1]
2192
2193     # Known non-numeric HTML entity
2194     if entity in compat_html_entities.name2codepoint:
2195         return compat_chr(compat_html_entities.name2codepoint[entity])
2196
2197     # TODO: HTML5 allows entities without a semicolon. For example,
2198     # '&Eacuteric' should be decoded as 'Éric'.
2199     if entity_with_semicolon in compat_html_entities_html5:
2200         return compat_html_entities_html5[entity_with_semicolon]
2201
2202     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2203     if mobj is not None:
2204         numstr = mobj.group(1)
2205         if numstr.startswith('x'):
2206             base = 16
2207             numstr = '0%s' % numstr
2208         else:
2209             base = 10
2210         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2211         try:
2212             return compat_chr(int(numstr, base))
2213         except ValueError:
2214             pass
2215
2216     # Unknown entity in name, return its literal representation
2217     return '&%s;' % entity
2218
2219
2220 def unescapeHTML(s):
2221     if s is None:
2222         return None
2223     assert type(s) == compat_str
2224
2225     return re.sub(
2226         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2227
2228
2229 def process_communicate_or_kill(p, *args, **kwargs):
2230     try:
2231         return p.communicate(*args, **kwargs)
2232     except BaseException:  # Including KeyboardInterrupt
2233         p.kill()
2234         p.wait()
2235         raise
2236
2237
2238 def get_subprocess_encoding():
2239     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240         # For subprocess calls, encode with locale encoding
2241         # Refer to http://stackoverflow.com/a/9951851/35070
2242         encoding = preferredencoding()
2243     else:
2244         encoding = sys.getfilesystemencoding()
2245     if encoding is None:
2246         encoding = 'utf-8'
2247     return encoding
2248
2249
2250 def encodeFilename(s, for_subprocess=False):
2251     """
2252     @param s The name of the file
2253     """
2254
2255     assert type(s) == compat_str
2256
2257     # Python 3 has a Unicode API
2258     if sys.version_info >= (3, 0):
2259         return s
2260
2261     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265         return s
2266
2267     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268     if sys.platform.startswith('java'):
2269         return s
2270
2271     return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def decodeFilename(b, for_subprocess=False):
2275
2276     if sys.version_info >= (3, 0):
2277         return b
2278
2279     if not isinstance(b, bytes):
2280         return b
2281
2282     return b.decode(get_subprocess_encoding(), 'ignore')
2283
2284
2285 def encodeArgument(s):
2286     if not isinstance(s, compat_str):
2287         # Legacy code that uses byte strings
2288         # Uncomment the following line after fixing all post processors
2289         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2290         s = s.decode('ascii')
2291     return encodeFilename(s, True)
2292
2293
2294 def decodeArgument(b):
2295     return decodeFilename(b, True)
2296
2297
2298 def decodeOption(optval):
2299     if optval is None:
2300         return optval
2301     if isinstance(optval, bytes):
2302         optval = optval.decode(preferredencoding())
2303
2304     assert isinstance(optval, compat_str)
2305     return optval
2306
2307
2308 def formatSeconds(secs, delim=':'):
2309     if secs > 3600:
2310         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2311     elif secs > 60:
2312         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2313     else:
2314         return '%d' % secs
2315
2316
2317 def make_HTTPS_handler(params, **kwargs):
2318     opts_no_check_certificate = params.get('nocheckcertificate', False)
2319     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2320         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2321         if opts_no_check_certificate:
2322             context.check_hostname = False
2323             context.verify_mode = ssl.CERT_NONE
2324         try:
2325             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2326         except TypeError:
2327             # Python 2.7.8
2328             # (create_default_context present but HTTPSHandler has no context=)
2329             pass
2330
2331     if sys.version_info < (3, 2):
2332         return YoutubeDLHTTPSHandler(params, **kwargs)
2333     else:  # Python < 3.4
2334         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2335         context.verify_mode = (ssl.CERT_NONE
2336                                if opts_no_check_certificate
2337                                else ssl.CERT_REQUIRED)
2338         context.set_default_verify_paths()
2339         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2340
2341
2342 def bug_reports_message():
2343     if ytdl_is_updateable():
2344         update_cmd = 'type  yt-dlp -U  to update'
2345     else:
2346         update_cmd = 'see  https://github.com/yt-dlp/yt-dlp  on how to update'
2347     msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
2348     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2349     msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2350     return msg
2351
2352
2353 class YoutubeDLError(Exception):
2354     """Base exception for YoutubeDL errors."""
2355     pass
2356
2357
2358 class ExtractorError(YoutubeDLError):
2359     """Error during info extraction."""
2360
2361     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2362         """ tb, if given, is the original traceback (so that it can be printed out).
2363         If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2364         """
2365
2366         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367             expected = True
2368         if video_id is not None:
2369             msg = video_id + ': ' + msg
2370         if cause:
2371             msg += ' (caused by %r)' % cause
2372         if not expected:
2373             msg += bug_reports_message()
2374         super(ExtractorError, self).__init__(msg)
2375
2376         self.traceback = tb
2377         self.exc_info = sys.exc_info()  # preserve original exception
2378         self.cause = cause
2379         self.video_id = video_id
2380
2381     def format_traceback(self):
2382         if self.traceback is None:
2383             return None
2384         return ''.join(traceback.format_tb(self.traceback))
2385
2386
2387 class UnsupportedError(ExtractorError):
2388     def __init__(self, url):
2389         super(UnsupportedError, self).__init__(
2390             'Unsupported URL: %s' % url, expected=True)
2391         self.url = url
2392
2393
2394 class RegexNotFoundError(ExtractorError):
2395     """Error when a regex didn't match"""
2396     pass
2397
2398
2399 class GeoRestrictedError(ExtractorError):
2400     """Geographic restriction Error exception.
2401
2402     This exception may be thrown when a video is not available from your
2403     geographic location due to geographic restrictions imposed by a website.
2404     """
2405
2406     def __init__(self, msg, countries=None):
2407         super(GeoRestrictedError, self).__init__(msg, expected=True)
2408         self.msg = msg
2409         self.countries = countries
2410
2411
2412 class DownloadError(YoutubeDLError):
2413     """Download Error exception.
2414
2415     This exception may be thrown by FileDownloader objects if they are not
2416     configured to continue on errors. They will contain the appropriate
2417     error message.
2418     """
2419
2420     def __init__(self, msg, exc_info=None):
2421         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422         super(DownloadError, self).__init__(msg)
2423         self.exc_info = exc_info
2424
2425
2426 class EntryNotInPlaylist(YoutubeDLError):
2427     """Entry not in playlist exception.
2428
2429     This exception will be thrown by YoutubeDL when a requested entry
2430     is not found in the playlist info_dict
2431     """
2432     pass
2433
2434
2435 class SameFileError(YoutubeDLError):
2436     """Same File exception.
2437
2438     This exception will be thrown by FileDownloader objects if they detect
2439     multiple files would have to be downloaded to the same file on disk.
2440     """
2441     pass
2442
2443
2444 class PostProcessingError(YoutubeDLError):
2445     """Post Processing exception.
2446
2447     This exception may be raised by PostProcessor's .run() method to
2448     indicate an error in the postprocessing task.
2449     """
2450
2451     def __init__(self, msg):
2452         super(PostProcessingError, self).__init__(msg)
2453         self.msg = msg
2454
2455
2456 class ExistingVideoReached(YoutubeDLError):
2457     """ --max-downloads limit has been reached. """
2458     pass
2459
2460
2461 class RejectedVideoReached(YoutubeDLError):
2462     """ --max-downloads limit has been reached. """
2463     pass
2464
2465
2466 class MaxDownloadsReached(YoutubeDLError):
2467     """ --max-downloads limit has been reached. """
2468     pass
2469
2470
2471 class UnavailableVideoError(YoutubeDLError):
2472     """Unavailable Format exception.
2473
2474     This exception will be thrown when a video is requested
2475     in a format that is not available for that video.
2476     """
2477     pass
2478
2479
2480 class ContentTooShortError(YoutubeDLError):
2481     """Content Too Short exception.
2482
2483     This exception may be raised by FileDownloader objects when a file they
2484     download is too small for what the server announced first, indicating
2485     the connection was probably interrupted.
2486     """
2487
2488     def __init__(self, downloaded, expected):
2489         super(ContentTooShortError, self).__init__(
2490             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2491         )
2492         # Both in bytes
2493         self.downloaded = downloaded
2494         self.expected = expected
2495
2496
2497 class XAttrMetadataError(YoutubeDLError):
2498     def __init__(self, code=None, msg='Unknown error'):
2499         super(XAttrMetadataError, self).__init__(msg)
2500         self.code = code
2501         self.msg = msg
2502
2503         # Parsing code and msg
2504         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2505                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2506             self.reason = 'NO_SPACE'
2507         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2508             self.reason = 'VALUE_TOO_LONG'
2509         else:
2510             self.reason = 'NOT_SUPPORTED'
2511
2512
2513 class XAttrUnavailableError(YoutubeDLError):
2514     pass
2515
2516
2517 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2518     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2519     # expected HTTP responses to meet HTTP/1.0 or later (see also
2520     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2521     if sys.version_info < (3, 0):
2522         kwargs['strict'] = True
2523     hc = http_class(*args, **compat_kwargs(kwargs))
2524     source_address = ydl_handler._params.get('source_address')
2525
2526     if source_address is not None:
2527         # This is to workaround _create_connection() from socket where it will try all
2528         # address data from getaddrinfo() including IPv6. This filters the result from
2529         # getaddrinfo() based on the source_address value.
2530         # This is based on the cpython socket.create_connection() function.
2531         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2532         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2533             host, port = address
2534             err = None
2535             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2536             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2537             ip_addrs = [addr for addr in addrs if addr[0] == af]
2538             if addrs and not ip_addrs:
2539                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2540                 raise socket.error(
2541                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2542                     % (ip_version, source_address[0]))
2543             for res in ip_addrs:
2544                 af, socktype, proto, canonname, sa = res
2545                 sock = None
2546                 try:
2547                     sock = socket.socket(af, socktype, proto)
2548                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2549                         sock.settimeout(timeout)
2550                     sock.bind(source_address)
2551                     sock.connect(sa)
2552                     err = None  # Explicitly break reference cycle
2553                     return sock
2554                 except socket.error as _:
2555                     err = _
2556                     if sock is not None:
2557                         sock.close()
2558             if err is not None:
2559                 raise err
2560             else:
2561                 raise socket.error('getaddrinfo returns an empty list')
2562         if hasattr(hc, '_create_connection'):
2563             hc._create_connection = _create_connection
2564         sa = (source_address, 0)
2565         if hasattr(hc, 'source_address'):  # Python 2.7+
2566             hc.source_address = sa
2567         else:  # Python 2.6
2568             def _hc_connect(self, *args, **kwargs):
2569                 sock = _create_connection(
2570                     (self.host, self.port), self.timeout, sa)
2571                 if is_https:
2572                     self.sock = ssl.wrap_socket(
2573                         sock, self.key_file, self.cert_file,
2574                         ssl_version=ssl.PROTOCOL_TLSv1)
2575                 else:
2576                     self.sock = sock
2577             hc.connect = functools.partial(_hc_connect, hc)
2578
2579     return hc
2580
2581
2582 def handle_youtubedl_headers(headers):
2583     filtered_headers = headers
2584
2585     if 'Youtubedl-no-compression' in filtered_headers:
2586         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2587         del filtered_headers['Youtubedl-no-compression']
2588
2589     return filtered_headers
2590
2591
2592 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2593     """Handler for HTTP requests and responses.
2594
2595     This class, when installed with an OpenerDirector, automatically adds
2596     the standard headers to every HTTP request and handles gzipped and
2597     deflated responses from web servers. If compression is to be avoided in
2598     a particular request, the original request in the program code only has
2599     to include the HTTP header "Youtubedl-no-compression", which will be
2600     removed before making the real request.
2601
2602     Part of this code was copied from:
2603
2604     http://techknack.net/python-urllib2-handlers/
2605
2606     Andrew Rowls, the author of that code, agreed to release it to the
2607     public domain.
2608     """
2609
2610     def __init__(self, params, *args, **kwargs):
2611         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2612         self._params = params
2613
2614     def http_open(self, req):
2615         conn_class = compat_http_client.HTTPConnection
2616
2617         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2618         if socks_proxy:
2619             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2620             del req.headers['Ytdl-socks-proxy']
2621
2622         return self.do_open(functools.partial(
2623             _create_http_connection, self, conn_class, False),
2624             req)
2625
2626     @staticmethod
2627     def deflate(data):
2628         if not data:
2629             return data
2630         try:
2631             return zlib.decompress(data, -zlib.MAX_WBITS)
2632         except zlib.error:
2633             return zlib.decompress(data)
2634
2635     def http_request(self, req):
2636         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2637         # always respected by websites, some tend to give out URLs with non percent-encoded
2638         # non-ASCII characters (see telemb.py, ard.py [#3412])
2639         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2640         # To work around aforementioned issue we will replace request's original URL with
2641         # percent-encoded one
2642         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2643         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2644         url = req.get_full_url()
2645         url_escaped = escape_url(url)
2646
2647         # Substitute URL if any change after escaping
2648         if url != url_escaped:
2649             req = update_Request(req, url=url_escaped)
2650
2651         for h, v in std_headers.items():
2652             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2653             # The dict keys are capitalized because of this bug by urllib
2654             if h.capitalize() not in req.headers:
2655                 req.add_header(h, v)
2656
2657         req.headers = handle_youtubedl_headers(req.headers)
2658
2659         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2660             # Python 2.6 is brain-dead when it comes to fragments
2661             req._Request__original = req._Request__original.partition('#')[0]
2662             req._Request__r_type = req._Request__r_type.partition('#')[0]
2663
2664         return req
2665
2666     def http_response(self, req, resp):
2667         old_resp = resp
2668         # gzip
2669         if resp.headers.get('Content-encoding', '') == 'gzip':
2670             content = resp.read()
2671             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2672             try:
2673                 uncompressed = io.BytesIO(gz.read())
2674             except IOError as original_ioerror:
2675                 # There may be junk add the end of the file
2676                 # See http://stackoverflow.com/q/4928560/35070 for details
2677                 for i in range(1, 1024):
2678                     try:
2679                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2680                         uncompressed = io.BytesIO(gz.read())
2681                     except IOError:
2682                         continue
2683                     break
2684                 else:
2685                     raise original_ioerror
2686             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2687             resp.msg = old_resp.msg
2688             del resp.headers['Content-encoding']
2689         # deflate
2690         if resp.headers.get('Content-encoding', '') == 'deflate':
2691             gz = io.BytesIO(self.deflate(resp.read()))
2692             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2693             resp.msg = old_resp.msg
2694             del resp.headers['Content-encoding']
2695         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2696         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2697         if 300 <= resp.code < 400:
2698             location = resp.headers.get('Location')
2699             if location:
2700                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2701                 if sys.version_info >= (3, 0):
2702                     location = location.encode('iso-8859-1').decode('utf-8')
2703                 else:
2704                     location = location.decode('utf-8')
2705                 location_escaped = escape_url(location)
2706                 if location != location_escaped:
2707                     del resp.headers['Location']
2708                     if sys.version_info < (3, 0):
2709                         location_escaped = location_escaped.encode('utf-8')
2710                     resp.headers['Location'] = location_escaped
2711         return resp
2712
2713     https_request = http_request
2714     https_response = http_response
2715
2716
2717 def make_socks_conn_class(base_class, socks_proxy):
2718     assert issubclass(base_class, (
2719         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2720
2721     url_components = compat_urlparse.urlparse(socks_proxy)
2722     if url_components.scheme.lower() == 'socks5':
2723         socks_type = ProxyType.SOCKS5
2724     elif url_components.scheme.lower() in ('socks', 'socks4'):
2725         socks_type = ProxyType.SOCKS4
2726     elif url_components.scheme.lower() == 'socks4a':
2727         socks_type = ProxyType.SOCKS4A
2728
2729     def unquote_if_non_empty(s):
2730         if not s:
2731             return s
2732         return compat_urllib_parse_unquote_plus(s)
2733
2734     proxy_args = (
2735         socks_type,
2736         url_components.hostname, url_components.port or 1080,
2737         True,  # Remote DNS
2738         unquote_if_non_empty(url_components.username),
2739         unquote_if_non_empty(url_components.password),
2740     )
2741
2742     class SocksConnection(base_class):
2743         def connect(self):
2744             self.sock = sockssocket()
2745             self.sock.setproxy(*proxy_args)
2746             if type(self.timeout) in (int, float):
2747                 self.sock.settimeout(self.timeout)
2748             self.sock.connect((self.host, self.port))
2749
2750             if isinstance(self, compat_http_client.HTTPSConnection):
2751                 if hasattr(self, '_context'):  # Python > 2.6
2752                     self.sock = self._context.wrap_socket(
2753                         self.sock, server_hostname=self.host)
2754                 else:
2755                     self.sock = ssl.wrap_socket(self.sock)
2756
2757     return SocksConnection
2758
2759
2760 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2761     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2762         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2763         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2764         self._params = params
2765
2766     def https_open(self, req):
2767         kwargs = {}
2768         conn_class = self._https_conn_class
2769
2770         if hasattr(self, '_context'):  # python > 2.6
2771             kwargs['context'] = self._context
2772         if hasattr(self, '_check_hostname'):  # python 3.x
2773             kwargs['check_hostname'] = self._check_hostname
2774
2775         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2776         if socks_proxy:
2777             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2778             del req.headers['Ytdl-socks-proxy']
2779
2780         return self.do_open(functools.partial(
2781             _create_http_connection, self, conn_class, True),
2782             req, **kwargs)
2783
2784
2785 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2786     """
2787     See [1] for cookie file format.
2788
2789     1. https://curl.haxx.se/docs/http-cookies.html
2790     """
2791     _HTTPONLY_PREFIX = '#HttpOnly_'
2792     _ENTRY_LEN = 7
2793     _HEADER = '''# Netscape HTTP Cookie File
2794 # This file is generated by yt-dlp.  Do not edit.
2795
2796 '''
2797     _CookieFileEntry = collections.namedtuple(
2798         'CookieFileEntry',
2799         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2800
2801     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2802         """
2803         Save cookies to a file.
2804
2805         Most of the code is taken from CPython 3.8 and slightly adapted
2806         to support cookie files with UTF-8 in both python 2 and 3.
2807         """
2808         if filename is None:
2809             if self.filename is not None:
2810                 filename = self.filename
2811             else:
2812                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2813
2814         # Store session cookies with `expires` set to 0 instead of an empty
2815         # string
2816         for cookie in self:
2817             if cookie.expires is None:
2818                 cookie.expires = 0
2819
2820         with io.open(filename, 'w', encoding='utf-8') as f:
2821             f.write(self._HEADER)
2822             now = time.time()
2823             for cookie in self:
2824                 if not ignore_discard and cookie.discard:
2825                     continue
2826                 if not ignore_expires and cookie.is_expired(now):
2827                     continue
2828                 if cookie.secure:
2829                     secure = 'TRUE'
2830                 else:
2831                     secure = 'FALSE'
2832                 if cookie.domain.startswith('.'):
2833                     initial_dot = 'TRUE'
2834                 else:
2835                     initial_dot = 'FALSE'
2836                 if cookie.expires is not None:
2837                     expires = compat_str(cookie.expires)
2838                 else:
2839                     expires = ''
2840                 if cookie.value is None:
2841                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2842                     # with no name, whereas http.cookiejar regards it as a
2843                     # cookie with no value.
2844                     name = ''
2845                     value = cookie.name
2846                 else:
2847                     name = cookie.name
2848                     value = cookie.value
2849                 f.write(
2850                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2851                                secure, expires, name, value]) + '\n')
2852
2853     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2854         """Load cookies from a file."""
2855         if filename is None:
2856             if self.filename is not None:
2857                 filename = self.filename
2858             else:
2859                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2860
2861         def prepare_line(line):
2862             if line.startswith(self._HTTPONLY_PREFIX):
2863                 line = line[len(self._HTTPONLY_PREFIX):]
2864             # comments and empty lines are fine
2865             if line.startswith('#') or not line.strip():
2866                 return line
2867             cookie_list = line.split('\t')
2868             if len(cookie_list) != self._ENTRY_LEN:
2869                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2870             cookie = self._CookieFileEntry(*cookie_list)
2871             if cookie.expires_at and not cookie.expires_at.isdigit():
2872                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2873             return line
2874
2875         cf = io.StringIO()
2876         with io.open(filename, encoding='utf-8') as f:
2877             for line in f:
2878                 try:
2879                     cf.write(prepare_line(line))
2880                 except compat_cookiejar.LoadError as e:
2881                     write_string(
2882                         'WARNING: skipping cookie file entry due to %s: %r\n'
2883                         % (e, line), sys.stderr)
2884                     continue
2885         cf.seek(0)
2886         self._really_load(cf, filename, ignore_discard, ignore_expires)
2887         # Session cookies are denoted by either `expires` field set to
2888         # an empty string or 0. MozillaCookieJar only recognizes the former
2889         # (see [1]). So we need force the latter to be recognized as session
2890         # cookies on our own.
2891         # Session cookies may be important for cookies-based authentication,
2892         # e.g. usually, when user does not check 'Remember me' check box while
2893         # logging in on a site, some important cookies are stored as session
2894         # cookies so that not recognizing them will result in failed login.
2895         # 1. https://bugs.python.org/issue17164
2896         for cookie in self:
2897             # Treat `expires=0` cookies as session cookies
2898             if cookie.expires == 0:
2899                 cookie.expires = None
2900                 cookie.discard = True
2901
2902
2903 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2904     def __init__(self, cookiejar=None):
2905         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2906
2907     def http_response(self, request, response):
2908         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2909         # characters in Set-Cookie HTTP header of last response (see
2910         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2911         # In order to at least prevent crashing we will percent encode Set-Cookie
2912         # header before HTTPCookieProcessor starts processing it.
2913         # if sys.version_info < (3, 0) and response.headers:
2914         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2915         #         set_cookie = response.headers.get(set_cookie_header)
2916         #         if set_cookie:
2917         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2918         #             if set_cookie != set_cookie_escaped:
2919         #                 del response.headers[set_cookie_header]
2920         #                 response.headers[set_cookie_header] = set_cookie_escaped
2921         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2922
2923     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2924     https_response = http_response
2925
2926
2927 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2928     if sys.version_info[0] < 3:
2929         def redirect_request(self, req, fp, code, msg, headers, newurl):
2930             # On python 2 urlh.geturl() may sometimes return redirect URL
2931             # as byte string instead of unicode. This workaround allows
2932             # to force it always return unicode.
2933             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2934
2935
2936 def extract_timezone(date_str):
2937     m = re.search(
2938         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2939         date_str)
2940     if not m:
2941         timezone = datetime.timedelta()
2942     else:
2943         date_str = date_str[:-len(m.group('tz'))]
2944         if not m.group('sign'):
2945             timezone = datetime.timedelta()
2946         else:
2947             sign = 1 if m.group('sign') == '+' else -1
2948             timezone = datetime.timedelta(
2949                 hours=sign * int(m.group('hours')),
2950                 minutes=sign * int(m.group('minutes')))
2951     return timezone, date_str
2952
2953
2954 def parse_iso8601(date_str, delimiter='T', timezone=None):
2955     """ Return a UNIX timestamp from the given date """
2956
2957     if date_str is None:
2958         return None
2959
2960     date_str = re.sub(r'\.[0-9]+', '', date_str)
2961
2962     if timezone is None:
2963         timezone, date_str = extract_timezone(date_str)
2964
2965     try:
2966         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2967         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2968         return calendar.timegm(dt.timetuple())
2969     except ValueError:
2970         pass
2971
2972
2973 def date_formats(day_first=True):
2974     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2975
2976
2977 def unified_strdate(date_str, day_first=True):
2978     """Return a string with the date in the format YYYYMMDD"""
2979
2980     if date_str is None:
2981         return None
2982     upload_date = None
2983     # Replace commas
2984     date_str = date_str.replace(',', ' ')
2985     # Remove AM/PM + timezone
2986     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2987     _, date_str = extract_timezone(date_str)
2988
2989     for expression in date_formats(day_first):
2990         try:
2991             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2992         except ValueError:
2993             pass
2994     if upload_date is None:
2995         timetuple = email.utils.parsedate_tz(date_str)
2996         if timetuple:
2997             try:
2998                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2999             except ValueError:
3000                 pass
3001     if upload_date is not None:
3002         return compat_str(upload_date)
3003
3004
3005 def unified_timestamp(date_str, day_first=True):
3006     if date_str is None:
3007         return None
3008
3009     date_str = re.sub(r'[,|]', '', date_str)
3010
3011     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3012     timezone, date_str = extract_timezone(date_str)
3013
3014     # Remove AM/PM + timezone
3015     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3016
3017     # Remove unrecognized timezones from ISO 8601 alike timestamps
3018     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3019     if m:
3020         date_str = date_str[:-len(m.group('tz'))]
3021
3022     # Python only supports microseconds, so remove nanoseconds
3023     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3024     if m:
3025         date_str = m.group(1)
3026
3027     for expression in date_formats(day_first):
3028         try:
3029             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3030             return calendar.timegm(dt.timetuple())
3031         except ValueError:
3032             pass
3033     timetuple = email.utils.parsedate_tz(date_str)
3034     if timetuple:
3035         return calendar.timegm(timetuple) + pm_delta * 3600
3036
3037
3038 def determine_ext(url, default_ext='unknown_video'):
3039     if url is None or '.' not in url:
3040         return default_ext
3041     guess = url.partition('?')[0].rpartition('.')[2]
3042     if re.match(r'^[A-Za-z0-9]+$', guess):
3043         return guess
3044     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3045     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3046         return guess.rstrip('/')
3047     else:
3048         return default_ext
3049
3050
3051 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3052     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3053
3054
3055 def date_from_str(date_str):
3056     """
3057     Return a datetime object from a string in the format YYYYMMDD or
3058     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3059     today = datetime.date.today()
3060     if date_str in ('now', 'today'):
3061         return today
3062     if date_str == 'yesterday':
3063         return today - datetime.timedelta(days=1)
3064     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3065     if match is not None:
3066         sign = match.group('sign')
3067         time = int(match.group('time'))
3068         if sign == '-':
3069             time = -time
3070         unit = match.group('unit')
3071         # A bad approximation?
3072         if unit == 'month':
3073             unit = 'day'
3074             time *= 30
3075         elif unit == 'year':
3076             unit = 'day'
3077             time *= 365
3078         unit += 's'
3079         delta = datetime.timedelta(**{unit: time})
3080         return today + delta
3081     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3082
3083
3084 def hyphenate_date(date_str):
3085     """
3086     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3087     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3088     if match is not None:
3089         return '-'.join(match.groups())
3090     else:
3091         return date_str
3092
3093
3094 class DateRange(object):
3095     """Represents a time interval between two dates"""
3096
3097     def __init__(self, start=None, end=None):
3098         """start and end must be strings in the format accepted by date"""
3099         if start is not None:
3100             self.start = date_from_str(start)
3101         else:
3102             self.start = datetime.datetime.min.date()
3103         if end is not None:
3104             self.end = date_from_str(end)
3105         else:
3106             self.end = datetime.datetime.max.date()
3107         if self.start > self.end:
3108             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3109
3110     @classmethod
3111     def day(cls, day):
3112         """Returns a range that only contains the given day"""
3113         return cls(day, day)
3114
3115     def __contains__(self, date):
3116         """Check if the date is in the range"""
3117         if not isinstance(date, datetime.date):
3118             date = date_from_str(date)
3119         return self.start <= date <= self.end
3120
3121     def __str__(self):
3122         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3123
3124
3125 def platform_name():
3126     """ Returns the platform name as a compat_str """
3127     res = platform.platform()
3128     if isinstance(res, bytes):
3129         res = res.decode(preferredencoding())
3130
3131     assert isinstance(res, compat_str)
3132     return res
3133
3134
3135 def _windows_write_string(s, out):
3136     """ Returns True if the string was written using special methods,
3137     False if it has yet to be written out."""
3138     # Adapted from http://stackoverflow.com/a/3259271/35070
3139
3140     import ctypes
3141     import ctypes.wintypes
3142
3143     WIN_OUTPUT_IDS = {
3144         1: -11,
3145         2: -12,
3146     }
3147
3148     try:
3149         fileno = out.fileno()
3150     except AttributeError:
3151         # If the output stream doesn't have a fileno, it's virtual
3152         return False
3153     except io.UnsupportedOperation:
3154         # Some strange Windows pseudo files?
3155         return False
3156     if fileno not in WIN_OUTPUT_IDS:
3157         return False
3158
3159     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3160         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3161         ('GetStdHandle', ctypes.windll.kernel32))
3162     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3163
3164     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3165         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3166         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3167         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3168     written = ctypes.wintypes.DWORD(0)
3169
3170     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3171     FILE_TYPE_CHAR = 0x0002
3172     FILE_TYPE_REMOTE = 0x8000
3173     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3174         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3175         ctypes.POINTER(ctypes.wintypes.DWORD))(
3176         ('GetConsoleMode', ctypes.windll.kernel32))
3177     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3178
3179     def not_a_console(handle):
3180         if handle == INVALID_HANDLE_VALUE or handle is None:
3181             return True
3182         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3183                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3184
3185     if not_a_console(h):
3186         return False
3187
3188     def next_nonbmp_pos(s):
3189         try:
3190             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3191         except StopIteration:
3192             return len(s)
3193
3194     while s:
3195         count = min(next_nonbmp_pos(s), 1024)
3196
3197         ret = WriteConsoleW(
3198             h, s, count if count else 2, ctypes.byref(written), None)
3199         if ret == 0:
3200             raise OSError('Failed to write string')
3201         if not count:  # We just wrote a non-BMP character
3202             assert written.value == 2
3203             s = s[1:]
3204         else:
3205             assert written.value > 0
3206             s = s[written.value:]
3207     return True
3208
3209
3210 def write_string(s, out=None, encoding=None):
3211     if out is None:
3212         out = sys.stderr
3213     assert type(s) == compat_str
3214
3215     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3216         if _windows_write_string(s, out):
3217             return
3218
3219     if ('b' in getattr(out, 'mode', '')
3220             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3221         byt = s.encode(encoding or preferredencoding(), 'ignore')
3222         out.write(byt)
3223     elif hasattr(out, 'buffer'):
3224         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3225         byt = s.encode(enc, 'ignore')
3226         out.buffer.write(byt)
3227     else:
3228         out.write(s)
3229     out.flush()
3230
3231
3232 def bytes_to_intlist(bs):
3233     if not bs:
3234         return []
3235     if isinstance(bs[0], int):  # Python 3
3236         return list(bs)
3237     else:
3238         return [ord(c) for c in bs]
3239
3240
3241 def intlist_to_bytes(xs):
3242     if not xs:
3243         return b''
3244     return compat_struct_pack('%dB' % len(xs), *xs)
3245
3246
3247 # Cross-platform file locking
3248 if sys.platform == 'win32':
3249     import ctypes.wintypes
3250     import msvcrt
3251
3252     class OVERLAPPED(ctypes.Structure):
3253         _fields_ = [
3254             ('Internal', ctypes.wintypes.LPVOID),
3255             ('InternalHigh', ctypes.wintypes.LPVOID),
3256             ('Offset', ctypes.wintypes.DWORD),
3257             ('OffsetHigh', ctypes.wintypes.DWORD),
3258             ('hEvent', ctypes.wintypes.HANDLE),
3259         ]
3260
3261     kernel32 = ctypes.windll.kernel32
3262     LockFileEx = kernel32.LockFileEx
3263     LockFileEx.argtypes = [
3264         ctypes.wintypes.HANDLE,     # hFile
3265         ctypes.wintypes.DWORD,      # dwFlags
3266         ctypes.wintypes.DWORD,      # dwReserved
3267         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3268         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3269         ctypes.POINTER(OVERLAPPED)  # Overlapped
3270     ]
3271     LockFileEx.restype = ctypes.wintypes.BOOL
3272     UnlockFileEx = kernel32.UnlockFileEx
3273     UnlockFileEx.argtypes = [
3274         ctypes.wintypes.HANDLE,     # hFile
3275         ctypes.wintypes.DWORD,      # dwReserved
3276         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3277         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3278         ctypes.POINTER(OVERLAPPED)  # Overlapped
3279     ]
3280     UnlockFileEx.restype = ctypes.wintypes.BOOL
3281     whole_low = 0xffffffff
3282     whole_high = 0x7fffffff
3283
3284     def _lock_file(f, exclusive):
3285         overlapped = OVERLAPPED()
3286         overlapped.Offset = 0
3287         overlapped.OffsetHigh = 0
3288         overlapped.hEvent = 0
3289         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3290         handle = msvcrt.get_osfhandle(f.fileno())
3291         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3292                           whole_low, whole_high, f._lock_file_overlapped_p):
3293             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3294
3295     def _unlock_file(f):
3296         assert f._lock_file_overlapped_p
3297         handle = msvcrt.get_osfhandle(f.fileno())
3298         if not UnlockFileEx(handle, 0,
3299                             whole_low, whole_high, f._lock_file_overlapped_p):
3300             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3301
3302 else:
3303     # Some platforms, such as Jython, is missing fcntl
3304     try:
3305         import fcntl
3306
3307         def _lock_file(f, exclusive):
3308             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3309
3310         def _unlock_file(f):
3311             fcntl.flock(f, fcntl.LOCK_UN)
3312     except ImportError:
3313         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3314
3315         def _lock_file(f, exclusive):
3316             raise IOError(UNSUPPORTED_MSG)
3317
3318         def _unlock_file(f):
3319             raise IOError(UNSUPPORTED_MSG)
3320
3321
3322 class locked_file(object):
3323     def __init__(self, filename, mode, encoding=None):
3324         assert mode in ['r', 'a', 'w']
3325         self.f = io.open(filename, mode, encoding=encoding)
3326         self.mode = mode
3327
3328     def __enter__(self):
3329         exclusive = self.mode != 'r'
3330         try:
3331             _lock_file(self.f, exclusive)
3332         except IOError:
3333             self.f.close()
3334             raise
3335         return self
3336
3337     def __exit__(self, etype, value, traceback):
3338         try:
3339             _unlock_file(self.f)
3340         finally:
3341             self.f.close()
3342
3343     def __iter__(self):
3344         return iter(self.f)
3345
3346     def write(self, *args):
3347         return self.f.write(*args)
3348
3349     def read(self, *args):
3350         return self.f.read(*args)
3351
3352
3353 def get_filesystem_encoding():
3354     encoding = sys.getfilesystemencoding()
3355     return encoding if encoding is not None else 'utf-8'
3356
3357
3358 def shell_quote(args):
3359     quoted_args = []
3360     encoding = get_filesystem_encoding()
3361     for a in args:
3362         if isinstance(a, bytes):
3363             # We may get a filename encoded with 'encodeFilename'
3364             a = a.decode(encoding)
3365         quoted_args.append(compat_shlex_quote(a))
3366     return ' '.join(quoted_args)
3367
3368
3369 def smuggle_url(url, data):
3370     """ Pass additional data in a URL for internal use. """
3371
3372     url, idata = unsmuggle_url(url, {})
3373     data.update(idata)
3374     sdata = compat_urllib_parse_urlencode(
3375         {'__youtubedl_smuggle': json.dumps(data)})
3376     return url + '#' + sdata
3377
3378
3379 def unsmuggle_url(smug_url, default=None):
3380     if '#__youtubedl_smuggle' not in smug_url:
3381         return smug_url, default
3382     url, _, sdata = smug_url.rpartition('#')
3383     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3384     data = json.loads(jsond)
3385     return url, data
3386
3387
3388 def format_bytes(bytes):
3389     if bytes is None:
3390         return 'N/A'
3391     if type(bytes) is str:
3392         bytes = float(bytes)
3393     if bytes == 0.0:
3394         exponent = 0
3395     else:
3396         exponent = int(math.log(bytes, 1024.0))
3397     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3398     converted = float(bytes) / float(1024 ** exponent)
3399     return '%.2f%s' % (converted, suffix)
3400
3401
3402 def lookup_unit_table(unit_table, s):
3403     units_re = '|'.join(re.escape(u) for u in unit_table)
3404     m = re.match(
3405         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3406     if not m:
3407         return None
3408     num_str = m.group('num').replace(',', '.')
3409     mult = unit_table[m.group('unit')]
3410     return int(float(num_str) * mult)
3411
3412
3413 def parse_filesize(s):
3414     if s is None:
3415         return None
3416
3417     # The lower-case forms are of course incorrect and unofficial,
3418     # but we support those too
3419     _UNIT_TABLE = {
3420         'B': 1,
3421         'b': 1,
3422         'bytes': 1,
3423         'KiB': 1024,
3424         'KB': 1000,
3425         'kB': 1024,
3426         'Kb': 1000,
3427         'kb': 1000,
3428         'kilobytes': 1000,
3429         'kibibytes': 1024,
3430         'MiB': 1024 ** 2,
3431         'MB': 1000 ** 2,
3432         'mB': 1024 ** 2,
3433         'Mb': 1000 ** 2,
3434         'mb': 1000 ** 2,
3435         'megabytes': 1000 ** 2,
3436         'mebibytes': 1024 ** 2,
3437         'GiB': 1024 ** 3,
3438         'GB': 1000 ** 3,
3439         'gB': 1024 ** 3,
3440         'Gb': 1000 ** 3,
3441         'gb': 1000 ** 3,
3442         'gigabytes': 1000 ** 3,
3443         'gibibytes': 1024 ** 3,
3444         'TiB': 1024 ** 4,
3445         'TB': 1000 ** 4,
3446         'tB': 1024 ** 4,
3447         'Tb': 1000 ** 4,
3448         'tb': 1000 ** 4,
3449         'terabytes': 1000 ** 4,
3450         'tebibytes': 1024 ** 4,
3451         'PiB': 1024 ** 5,
3452         'PB': 1000 ** 5,
3453         'pB': 1024 ** 5,
3454         'Pb': 1000 ** 5,
3455         'pb': 1000 ** 5,
3456         'petabytes': 1000 ** 5,
3457         'pebibytes': 1024 ** 5,
3458         'EiB': 1024 ** 6,
3459         'EB': 1000 ** 6,
3460         'eB': 1024 ** 6,
3461         'Eb': 1000 ** 6,
3462         'eb': 1000 ** 6,
3463         'exabytes': 1000 ** 6,
3464         'exbibytes': 1024 ** 6,
3465         'ZiB': 1024 ** 7,
3466         'ZB': 1000 ** 7,
3467         'zB': 1024 ** 7,
3468         'Zb': 1000 ** 7,
3469         'zb': 1000 ** 7,
3470         'zettabytes': 1000 ** 7,
3471         'zebibytes': 1024 ** 7,
3472         'YiB': 1024 ** 8,
3473         'YB': 1000 ** 8,
3474         'yB': 1024 ** 8,
3475         'Yb': 1000 ** 8,
3476         'yb': 1000 ** 8,
3477         'yottabytes': 1000 ** 8,
3478         'yobibytes': 1024 ** 8,
3479     }
3480
3481     return lookup_unit_table(_UNIT_TABLE, s)
3482
3483
3484 def parse_count(s):
3485     if s is None:
3486         return None
3487
3488     s = s.strip()
3489
3490     if re.match(r'^[\d,.]+$', s):
3491         return str_to_int(s)
3492
3493     _UNIT_TABLE = {
3494         'k': 1000,
3495         'K': 1000,
3496         'm': 1000 ** 2,
3497         'M': 1000 ** 2,
3498         'kk': 1000 ** 2,
3499         'KK': 1000 ** 2,
3500     }
3501
3502     return lookup_unit_table(_UNIT_TABLE, s)
3503
3504
3505 def parse_resolution(s):
3506     if s is None:
3507         return {}
3508
3509     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3510     if mobj:
3511         return {
3512             'width': int(mobj.group('w')),
3513             'height': int(mobj.group('h')),
3514         }
3515
3516     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3517     if mobj:
3518         return {'height': int(mobj.group(1))}
3519
3520     mobj = re.search(r'\b([48])[kK]\b', s)
3521     if mobj:
3522         return {'height': int(mobj.group(1)) * 540}
3523
3524     return {}
3525
3526
3527 def parse_bitrate(s):
3528     if not isinstance(s, compat_str):
3529         return
3530     mobj = re.search(r'\b(\d+)\s*kbps', s)
3531     if mobj:
3532         return int(mobj.group(1))
3533
3534
3535 def month_by_name(name, lang='en'):
3536     """ Return the number of a month by (locale-independently) English name """
3537
3538     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3539
3540     try:
3541         return month_names.index(name) + 1
3542     except ValueError:
3543         return None
3544
3545
3546 def month_by_abbreviation(abbrev):
3547     """ Return the number of a month by (locale-independently) English
3548         abbreviations """
3549
3550     try:
3551         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3552     except ValueError:
3553         return None
3554
3555
3556 def fix_xml_ampersands(xml_str):
3557     """Replace all the '&' by '&amp;' in XML"""
3558     return re.sub(
3559         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3560         '&amp;',
3561         xml_str)
3562
3563
3564 def setproctitle(title):
3565     assert isinstance(title, compat_str)
3566
3567     # ctypes in Jython is not complete
3568     # http://bugs.jython.org/issue2148
3569     if sys.platform.startswith('java'):
3570         return
3571
3572     try:
3573         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3574     except OSError:
3575         return
3576     except TypeError:
3577         # LoadLibrary in Windows Python 2.7.13 only expects
3578         # a bytestring, but since unicode_literals turns
3579         # every string into a unicode string, it fails.
3580         return
3581     title_bytes = title.encode('utf-8')
3582     buf = ctypes.create_string_buffer(len(title_bytes))
3583     buf.value = title_bytes
3584     try:
3585         libc.prctl(15, buf, 0, 0, 0)
3586     except AttributeError:
3587         return  # Strange libc, just skip this
3588
3589
3590 def remove_start(s, start):
3591     return s[len(start):] if s is not None and s.startswith(start) else s
3592
3593
3594 def remove_end(s, end):
3595     return s[:-len(end)] if s is not None and s.endswith(end) else s
3596
3597
3598 def remove_quotes(s):
3599     if s is None or len(s) < 2:
3600         return s
3601     for quote in ('"', "'", ):
3602         if s[0] == quote and s[-1] == quote:
3603             return s[1:-1]
3604     return s
3605
3606
3607 def get_domain(url):
3608     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3609     return domain.group('domain') if domain else None
3610
3611
3612 def url_basename(url):
3613     path = compat_urlparse.urlparse(url).path
3614     return path.strip('/').split('/')[-1]
3615
3616
3617 def base_url(url):
3618     return re.match(r'https?://[^?#&]+/', url).group()
3619
3620
3621 def urljoin(base, path):
3622     if isinstance(path, bytes):
3623         path = path.decode('utf-8')
3624     if not isinstance(path, compat_str) or not path:
3625         return None
3626     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3627         return path
3628     if isinstance(base, bytes):
3629         base = base.decode('utf-8')
3630     if not isinstance(base, compat_str) or not re.match(
3631             r'^(?:https?:)?//', base):
3632         return None
3633     return compat_urlparse.urljoin(base, path)
3634
3635
3636 class HEADRequest(compat_urllib_request.Request):
3637     def get_method(self):
3638         return 'HEAD'
3639
3640
3641 class PUTRequest(compat_urllib_request.Request):
3642     def get_method(self):
3643         return 'PUT'
3644
3645
3646 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3647     if get_attr:
3648         if v is not None:
3649             v = getattr(v, get_attr, None)
3650     if v == '':
3651         v = None
3652     if v is None:
3653         return default
3654     try:
3655         return int(v) * invscale // scale
3656     except (ValueError, TypeError):
3657         return default
3658
3659
3660 def str_or_none(v, default=None):
3661     return default if v is None else compat_str(v)
3662
3663
3664 def str_to_int(int_str):
3665     """ A more relaxed version of int_or_none """
3666     if isinstance(int_str, compat_integer_types):
3667         return int_str
3668     elif isinstance(int_str, compat_str):
3669         int_str = re.sub(r'[,\.\+]', '', int_str)
3670         return int_or_none(int_str)
3671
3672
3673 def float_or_none(v, scale=1, invscale=1, default=None):
3674     if v is None:
3675         return default
3676     try:
3677         return float(v) * invscale / scale
3678     except (ValueError, TypeError):
3679         return default
3680
3681
3682 def bool_or_none(v, default=None):
3683     return v if isinstance(v, bool) else default
3684
3685
3686 def strip_or_none(v, default=None):
3687     return v.strip() if isinstance(v, compat_str) else default
3688
3689
3690 def url_or_none(url):
3691     if not url or not isinstance(url, compat_str):
3692         return None
3693     url = url.strip()
3694     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3695
3696
3697 def strftime_or_none(timestamp, date_format, default=None):
3698     datetime_object = None
3699     try:
3700         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3701             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3702         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3703             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3704         return datetime_object.strftime(date_format)
3705     except (ValueError, TypeError, AttributeError):
3706         return default
3707
3708
3709 def parse_duration(s):
3710     if not isinstance(s, compat_basestring):
3711         return None
3712
3713     s = s.strip()
3714
3715     days, hours, mins, secs, ms = [None] * 5
3716     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3717     if m:
3718         days, hours, mins, secs, ms = m.groups()
3719     else:
3720         m = re.match(
3721             r'''(?ix)(?:P?
3722                 (?:
3723                     [0-9]+\s*y(?:ears?)?\s*
3724                 )?
3725                 (?:
3726                     [0-9]+\s*m(?:onths?)?\s*
3727                 )?
3728                 (?:
3729                     [0-9]+\s*w(?:eeks?)?\s*
3730                 )?
3731                 (?:
3732                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3733                 )?
3734                 T)?
3735                 (?:
3736                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3737                 )?
3738                 (?:
3739                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3740                 )?
3741                 (?:
3742                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3743                 )?Z?$''', s)
3744         if m:
3745             days, hours, mins, secs, ms = m.groups()
3746         else:
3747             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3748             if m:
3749                 hours, mins = m.groups()
3750             else:
3751                 return None
3752
3753     duration = 0
3754     if secs:
3755         duration += float(secs)
3756     if mins:
3757         duration += float(mins) * 60
3758     if hours:
3759         duration += float(hours) * 60 * 60
3760     if days:
3761         duration += float(days) * 24 * 60 * 60
3762     if ms:
3763         duration += float(ms)
3764     return duration
3765
3766
3767 def prepend_extension(filename, ext, expected_real_ext=None):
3768     name, real_ext = os.path.splitext(filename)
3769     return (
3770         '{0}.{1}{2}'.format(name, ext, real_ext)
3771         if not expected_real_ext or real_ext[1:] == expected_real_ext
3772         else '{0}.{1}'.format(filename, ext))
3773
3774
3775 def replace_extension(filename, ext, expected_real_ext=None):
3776     name, real_ext = os.path.splitext(filename)
3777     return '{0}.{1}'.format(
3778         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3779         ext)
3780
3781
3782 def check_executable(exe, args=[]):
3783     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3784     args can be a list of arguments for a short output (like -version) """
3785     try:
3786         process_communicate_or_kill(subprocess.Popen(
3787             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3788     except OSError:
3789         return False
3790     return exe
3791
3792
3793 def get_exe_version(exe, args=['--version'],
3794                     version_re=None, unrecognized='present'):
3795     """ Returns the version of the specified executable,
3796     or False if the executable is not present """
3797     try:
3798         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3799         # SIGTTOU if yt-dlp is run in the background.
3800         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3801         out, _ = process_communicate_or_kill(subprocess.Popen(
3802             [encodeArgument(exe)] + args,
3803             stdin=subprocess.PIPE,
3804             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3805     except OSError:
3806         return False
3807     if isinstance(out, bytes):  # Python 2.x
3808         out = out.decode('ascii', 'ignore')
3809     return detect_exe_version(out, version_re, unrecognized)
3810
3811
3812 def detect_exe_version(output, version_re=None, unrecognized='present'):
3813     assert isinstance(output, compat_str)
3814     if version_re is None:
3815         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3816     m = re.search(version_re, output)
3817     if m:
3818         return m.group(1)
3819     else:
3820         return unrecognized
3821
3822
3823 class PagedList(object):
3824     def __len__(self):
3825         # This is only useful for tests
3826         return len(self.getslice())
3827
3828
3829 class OnDemandPagedList(PagedList):
3830     def __init__(self, pagefunc, pagesize, use_cache=True):
3831         self._pagefunc = pagefunc
3832         self._pagesize = pagesize
3833         self._use_cache = use_cache
3834         if use_cache:
3835             self._cache = {}
3836
3837     def getslice(self, start=0, end=None):
3838         res = []
3839         for pagenum in itertools.count(start // self._pagesize):
3840             firstid = pagenum * self._pagesize
3841             nextfirstid = pagenum * self._pagesize + self._pagesize
3842             if start >= nextfirstid:
3843                 continue
3844
3845             page_results = None
3846             if self._use_cache:
3847                 page_results = self._cache.get(pagenum)
3848             if page_results is None:
3849                 page_results = list(self._pagefunc(pagenum))
3850             if self._use_cache:
3851                 self._cache[pagenum] = page_results
3852
3853             startv = (
3854                 start % self._pagesize
3855                 if firstid <= start < nextfirstid
3856                 else 0)
3857
3858             endv = (
3859                 ((end - 1) % self._pagesize) + 1
3860                 if (end is not None and firstid <= end <= nextfirstid)
3861                 else None)
3862
3863             if startv != 0 or endv is not None:
3864                 page_results = page_results[startv:endv]
3865             res.extend(page_results)
3866
3867             # A little optimization - if current page is not "full", ie. does
3868             # not contain page_size videos then we can assume that this page
3869             # is the last one - there are no more ids on further pages -
3870             # i.e. no need to query again.
3871             if len(page_results) + startv < self._pagesize:
3872                 break
3873
3874             # If we got the whole page, but the next page is not interesting,
3875             # break out early as well
3876             if end == nextfirstid:
3877                 break
3878         return res
3879
3880
3881 class InAdvancePagedList(PagedList):
3882     def __init__(self, pagefunc, pagecount, pagesize):
3883         self._pagefunc = pagefunc
3884         self._pagecount = pagecount
3885         self._pagesize = pagesize
3886
3887     def getslice(self, start=0, end=None):
3888         res = []
3889         start_page = start // self._pagesize
3890         end_page = (
3891             self._pagecount if end is None else (end // self._pagesize + 1))
3892         skip_elems = start - start_page * self._pagesize
3893         only_more = None if end is None else end - start
3894         for pagenum in range(start_page, end_page):
3895             page = list(self._pagefunc(pagenum))
3896             if skip_elems:
3897                 page = page[skip_elems:]
3898                 skip_elems = None
3899             if only_more is not None:
3900                 if len(page) < only_more:
3901                     only_more -= len(page)
3902                 else:
3903                     page = page[:only_more]
3904                     res.extend(page)
3905                     break
3906             res.extend(page)
3907         return res
3908
3909
3910 def uppercase_escape(s):
3911     unicode_escape = codecs.getdecoder('unicode_escape')
3912     return re.sub(
3913         r'\\U[0-9a-fA-F]{8}',
3914         lambda m: unicode_escape(m.group(0))[0],
3915         s)
3916
3917
3918 def lowercase_escape(s):
3919     unicode_escape = codecs.getdecoder('unicode_escape')
3920     return re.sub(
3921         r'\\u[0-9a-fA-F]{4}',
3922         lambda m: unicode_escape(m.group(0))[0],
3923         s)
3924
3925
3926 def escape_rfc3986(s):
3927     """Escape non-ASCII characters as suggested by RFC 3986"""
3928     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3929         s = s.encode('utf-8')
3930     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3931
3932
3933 def escape_url(url):
3934     """Escape URL as suggested by RFC 3986"""
3935     url_parsed = compat_urllib_parse_urlparse(url)
3936     return url_parsed._replace(
3937         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3938         path=escape_rfc3986(url_parsed.path),
3939         params=escape_rfc3986(url_parsed.params),
3940         query=escape_rfc3986(url_parsed.query),
3941         fragment=escape_rfc3986(url_parsed.fragment)
3942     ).geturl()
3943
3944
3945 def read_batch_urls(batch_fd):
3946     def fixup(url):
3947         if not isinstance(url, compat_str):
3948             url = url.decode('utf-8', 'replace')
3949         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3950         for bom in BOM_UTF8:
3951             if url.startswith(bom):
3952                 url = url[len(bom):]
3953         url = url.lstrip()
3954         if not url or url.startswith(('#', ';', ']')):
3955             return False
3956         # "#" cannot be stripped out since it is part of the URI
3957         # However, it can be safely stipped out if follwing a whitespace
3958         return re.split(r'\s#', url, 1)[0].rstrip()
3959
3960     with contextlib.closing(batch_fd) as fd:
3961         return [url for url in map(fixup, fd) if url]
3962
3963
3964 def urlencode_postdata(*args, **kargs):
3965     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3966
3967
3968 def update_url_query(url, query):
3969     if not query:
3970         return url
3971     parsed_url = compat_urlparse.urlparse(url)
3972     qs = compat_parse_qs(parsed_url.query)
3973     qs.update(query)
3974     return compat_urlparse.urlunparse(parsed_url._replace(
3975         query=compat_urllib_parse_urlencode(qs, True)))
3976
3977
3978 def update_Request(req, url=None, data=None, headers={}, query={}):
3979     req_headers = req.headers.copy()
3980     req_headers.update(headers)
3981     req_data = data or req.data
3982     req_url = update_url_query(url or req.get_full_url(), query)
3983     req_get_method = req.get_method()
3984     if req_get_method == 'HEAD':
3985         req_type = HEADRequest
3986     elif req_get_method == 'PUT':
3987         req_type = PUTRequest
3988     else:
3989         req_type = compat_urllib_request.Request
3990     new_req = req_type(
3991         req_url, data=req_data, headers=req_headers,
3992         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3993     if hasattr(req, 'timeout'):
3994         new_req.timeout = req.timeout
3995     return new_req
3996
3997
3998 def _multipart_encode_impl(data, boundary):
3999     content_type = 'multipart/form-data; boundary=%s' % boundary
4000
4001     out = b''
4002     for k, v in data.items():
4003         out += b'--' + boundary.encode('ascii') + b'\r\n'
4004         if isinstance(k, compat_str):
4005             k = k.encode('utf-8')
4006         if isinstance(v, compat_str):
4007             v = v.encode('utf-8')
4008         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4009         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4010         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4011         if boundary.encode('ascii') in content:
4012             raise ValueError('Boundary overlaps with data')
4013         out += content
4014
4015     out += b'--' + boundary.encode('ascii') + b'--\r\n'
4016
4017     return out, content_type
4018
4019
4020 def multipart_encode(data, boundary=None):
4021     '''
4022     Encode a dict to RFC 7578-compliant form-data
4023
4024     data:
4025         A dict where keys and values can be either Unicode or bytes-like
4026         objects.
4027     boundary:
4028         If specified a Unicode object, it's used as the boundary. Otherwise
4029         a random boundary is generated.
4030
4031     Reference: https://tools.ietf.org/html/rfc7578
4032     '''
4033     has_specified_boundary = boundary is not None
4034
4035     while True:
4036         if boundary is None:
4037             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4038
4039         try:
4040             out, content_type = _multipart_encode_impl(data, boundary)
4041             break
4042         except ValueError:
4043             if has_specified_boundary:
4044                 raise
4045             boundary = None
4046
4047     return out, content_type
4048
4049
4050 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4051     if isinstance(key_or_keys, (list, tuple)):
4052         for key in key_or_keys:
4053             if key not in d or d[key] is None or skip_false_values and not d[key]:
4054                 continue
4055             return d[key]
4056         return default
4057     return d.get(key_or_keys, default)
4058
4059
4060 def try_get(src, getter, expected_type=None):
4061     if not isinstance(getter, (list, tuple)):
4062         getter = [getter]
4063     for get in getter:
4064         try:
4065             v = get(src)
4066         except (AttributeError, KeyError, TypeError, IndexError):
4067             pass
4068         else:
4069             if expected_type is None or isinstance(v, expected_type):
4070                 return v
4071
4072
4073 def merge_dicts(*dicts):
4074     merged = {}
4075     for a_dict in dicts:
4076         for k, v in a_dict.items():
4077             if v is None:
4078                 continue
4079             if (k not in merged
4080                     or (isinstance(v, compat_str) and v
4081                         and isinstance(merged[k], compat_str)
4082                         and not merged[k])):
4083                 merged[k] = v
4084     return merged
4085
4086
4087 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4088     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4089
4090
4091 US_RATINGS = {
4092     'G': 0,
4093     'PG': 10,
4094     'PG-13': 13,
4095     'R': 16,
4096     'NC': 18,
4097 }
4098
4099
4100 TV_PARENTAL_GUIDELINES = {
4101     'TV-Y': 0,
4102     'TV-Y7': 7,
4103     'TV-G': 0,
4104     'TV-PG': 0,
4105     'TV-14': 14,
4106     'TV-MA': 17,
4107 }
4108
4109
4110 def parse_age_limit(s):
4111     if type(s) == int:
4112         return s if 0 <= s <= 21 else None
4113     if not isinstance(s, compat_basestring):
4114         return None
4115     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4116     if m:
4117         return int(m.group('age'))
4118     s = s.upper()
4119     if s in US_RATINGS:
4120         return US_RATINGS[s]
4121     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4122     if m:
4123         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4124     return None
4125
4126
4127 def strip_jsonp(code):
4128     return re.sub(
4129         r'''(?sx)^
4130             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4131             (?:\s*&&\s*(?P=func_name))?
4132             \s*\(\s*(?P<callback_data>.*)\);?
4133             \s*?(?://[^\n]*)*$''',
4134         r'\g<callback_data>', code)
4135
4136
4137 def js_to_json(code, vars={}):
4138     # vars is a dict of var, val pairs to substitute
4139     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4140     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4141     INTEGER_TABLE = (
4142         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4143         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4144     )
4145
4146     def fix_kv(m):
4147         v = m.group(0)
4148         if v in ('true', 'false', 'null'):
4149             return v
4150         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4151             return ""
4152
4153         if v[0] in ("'", '"'):
4154             v = re.sub(r'(?s)\\.|"', lambda m: {
4155                 '"': '\\"',
4156                 "\\'": "'",
4157                 '\\\n': '',
4158                 '\\x': '\\u00',
4159             }.get(m.group(0), m.group(0)), v[1:-1])
4160         else:
4161             for regex, base in INTEGER_TABLE:
4162                 im = re.match(regex, v)
4163                 if im:
4164                     i = int(im.group(1), base)
4165                     return '"%d":' % i if v.endswith(':') else '%d' % i
4166
4167             if v in vars:
4168                 return vars[v]
4169
4170         return '"%s"' % v
4171
4172     return re.sub(r'''(?sx)
4173         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4174         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4175         {comment}|,(?={skip}[\]}}])|
4176         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4177         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4178         [0-9]+(?={skip}:)|
4179         !+
4180         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4181
4182
4183 def qualities(quality_ids):
4184     """ Get a numeric quality value out of a list of possible values """
4185     def q(qid):
4186         try:
4187             return quality_ids.index(qid)
4188         except ValueError:
4189             return -1
4190     return q
4191
4192
4193 DEFAULT_OUTTMPL = {
4194     'default': '%(title)s [%(id)s].%(ext)s',
4195     'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4196 }
4197 OUTTMPL_TYPES = {
4198     'chapter': None,
4199     'subtitle': None,
4200     'thumbnail': None,
4201     'description': 'description',
4202     'annotation': 'annotations.xml',
4203     'infojson': 'info.json',
4204     'pl_description': 'description',
4205     'pl_infojson': 'info.json',
4206 }
4207
4208 # As of [1] format syntax is:
4209 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4210 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4211 FORMAT_RE = r'''(?x)
4212     (?<!%)
4213     %
4214     \({0}\)  # mapping key
4215     (?:[#0\-+ ]+)?  # conversion flags (optional)
4216     (?:\d+)?  # minimum field width (optional)
4217     (?:\.\d+)?  # precision (optional)
4218     [hlL]?  # length modifier (optional)
4219     (?P<type>[diouxXeEfFgGcrs%])  # conversion type
4220 '''
4221
4222
4223 def limit_length(s, length):
4224     """ Add ellipses to overly long strings """
4225     if s is None:
4226         return None
4227     ELLIPSES = '...'
4228     if len(s) > length:
4229         return s[:length - len(ELLIPSES)] + ELLIPSES
4230     return s
4231
4232
4233 def version_tuple(v):
4234     return tuple(int(e) for e in re.split(r'[-.]', v))
4235
4236
4237 def is_outdated_version(version, limit, assume_new=True):
4238     if not version:
4239         return not assume_new
4240     try:
4241         return version_tuple(version) < version_tuple(limit)
4242     except ValueError:
4243         return not assume_new
4244
4245
4246 def ytdl_is_updateable():
4247     """ Returns if yt-dlp can be updated with -U """
4248     return False
4249
4250     from zipimport import zipimporter
4251
4252     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4253
4254
4255 def args_to_str(args):
4256     # Get a short string representation for a subprocess command
4257     return ' '.join(compat_shlex_quote(a) for a in args)
4258
4259
4260 def error_to_compat_str(err):
4261     err_str = str(err)
4262     # On python 2 error byte string must be decoded with proper
4263     # encoding rather than ascii
4264     if sys.version_info[0] < 3:
4265         err_str = err_str.decode(preferredencoding())
4266     return err_str
4267
4268
4269 def mimetype2ext(mt):
4270     if mt is None:
4271         return None
4272
4273     ext = {
4274         'audio/mp4': 'm4a',
4275         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4276         # it's the most popular one
4277         'audio/mpeg': 'mp3',
4278         'audio/x-wav': 'wav',
4279     }.get(mt)
4280     if ext is not None:
4281         return ext
4282
4283     _, _, res = mt.rpartition('/')
4284     res = res.split(';')[0].strip().lower()
4285
4286     return {
4287         '3gpp': '3gp',
4288         'smptett+xml': 'tt',
4289         'ttaf+xml': 'dfxp',
4290         'ttml+xml': 'ttml',
4291         'x-flv': 'flv',
4292         'x-mp4-fragmented': 'mp4',
4293         'x-ms-sami': 'sami',
4294         'x-ms-wmv': 'wmv',
4295         'mpegurl': 'm3u8',
4296         'x-mpegurl': 'm3u8',
4297         'vnd.apple.mpegurl': 'm3u8',
4298         'dash+xml': 'mpd',
4299         'f4m+xml': 'f4m',
4300         'hds+xml': 'f4m',
4301         'vnd.ms-sstr+xml': 'ism',
4302         'quicktime': 'mov',
4303         'mp2t': 'ts',
4304         'x-wav': 'wav',
4305     }.get(res, res)
4306
4307
4308 def parse_codecs(codecs_str):
4309     # http://tools.ietf.org/html/rfc6381
4310     if not codecs_str:
4311         return {}
4312     split_codecs = list(filter(None, map(
4313         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4314     vcodec, acodec = None, None
4315     for full_codec in split_codecs:
4316         codec = full_codec.split('.')[0]
4317         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4318             if not vcodec:
4319                 vcodec = full_codec
4320         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4321             if not acodec:
4322                 acodec = full_codec
4323         else:
4324             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4325     if not vcodec and not acodec:
4326         if len(split_codecs) == 2:
4327             return {
4328                 'vcodec': split_codecs[0],
4329                 'acodec': split_codecs[1],
4330             }
4331     else:
4332         return {
4333             'vcodec': vcodec or 'none',
4334             'acodec': acodec or 'none',
4335         }
4336     return {}
4337
4338
4339 def urlhandle_detect_ext(url_handle):
4340     getheader = url_handle.headers.get
4341
4342     cd = getheader('Content-Disposition')
4343     if cd:
4344         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4345         if m:
4346             e = determine_ext(m.group('filename'), default_ext=None)
4347             if e:
4348                 return e
4349
4350     return mimetype2ext(getheader('Content-Type'))
4351
4352
4353 def encode_data_uri(data, mime_type):
4354     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4355
4356
4357 def age_restricted(content_limit, age_limit):
4358     """ Returns True iff the content should be blocked """
4359
4360     if age_limit is None:  # No limit set
4361         return False
4362     if content_limit is None:
4363         return False  # Content available for everyone
4364     return age_limit < content_limit
4365
4366
4367 def is_html(first_bytes):
4368     """ Detect whether a file contains HTML by examining its first bytes. """
4369
4370     BOMS = [
4371         (b'\xef\xbb\xbf', 'utf-8'),
4372         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4373         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4374         (b'\xff\xfe', 'utf-16-le'),
4375         (b'\xfe\xff', 'utf-16-be'),
4376     ]
4377     for bom, enc in BOMS:
4378         if first_bytes.startswith(bom):
4379             s = first_bytes[len(bom):].decode(enc, 'replace')
4380             break
4381     else:
4382         s = first_bytes.decode('utf-8', 'replace')
4383
4384     return re.match(r'^\s*<', s)
4385
4386
4387 def determine_protocol(info_dict):
4388     protocol = info_dict.get('protocol')
4389     if protocol is not None:
4390         return protocol
4391
4392     url = info_dict['url']
4393     if url.startswith('rtmp'):
4394         return 'rtmp'
4395     elif url.startswith('mms'):
4396         return 'mms'
4397     elif url.startswith('rtsp'):
4398         return 'rtsp'
4399
4400     ext = determine_ext(url)
4401     if ext == 'm3u8':
4402         return 'm3u8'
4403     elif ext == 'f4m':
4404         return 'f4m'
4405
4406     return compat_urllib_parse_urlparse(url).scheme
4407
4408
4409 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4410     """ Render a list of rows, each as a list of values """
4411
4412     def get_max_lens(table):
4413         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4414
4415     def filter_using_list(row, filterArray):
4416         return [col for (take, col) in zip(filterArray, row) if take]
4417
4418     if hideEmpty:
4419         max_lens = get_max_lens(data)
4420         header_row = filter_using_list(header_row, max_lens)
4421         data = [filter_using_list(row, max_lens) for row in data]
4422
4423     table = [header_row] + data
4424     max_lens = get_max_lens(table)
4425     if delim:
4426         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4427     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4428     return '\n'.join(format_str % tuple(row) for row in table)
4429
4430
4431 def _match_one(filter_part, dct):
4432     COMPARISON_OPERATORS = {
4433         '<': operator.lt,
4434         '<=': operator.le,
4435         '>': operator.gt,
4436         '>=': operator.ge,
4437         '=': operator.eq,
4438         '!=': operator.ne,
4439     }
4440     operator_rex = re.compile(r'''(?x)\s*
4441         (?P<key>[a-z_]+)
4442         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4443         (?:
4444             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4445             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4446             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4447         )
4448         \s*$
4449         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4450     m = operator_rex.search(filter_part)
4451     if m:
4452         op = COMPARISON_OPERATORS[m.group('op')]
4453         actual_value = dct.get(m.group('key'))
4454         if (m.group('quotedstrval') is not None
4455             or m.group('strval') is not None
4456             # If the original field is a string and matching comparisonvalue is
4457             # a number we should respect the origin of the original field
4458             # and process comparison value as a string (see
4459             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4460             or actual_value is not None and m.group('intval') is not None
4461                 and isinstance(actual_value, compat_str)):
4462             if m.group('op') not in ('=', '!='):
4463                 raise ValueError(
4464                     'Operator %s does not support string values!' % m.group('op'))
4465             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4466             quote = m.group('quote')
4467             if quote is not None:
4468                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4469         else:
4470             try:
4471                 comparison_value = int(m.group('intval'))
4472             except ValueError:
4473                 comparison_value = parse_filesize(m.group('intval'))
4474                 if comparison_value is None:
4475                     comparison_value = parse_filesize(m.group('intval') + 'B')
4476                 if comparison_value is None:
4477                     raise ValueError(
4478                         'Invalid integer value %r in filter part %r' % (
4479                             m.group('intval'), filter_part))
4480         if actual_value is None:
4481             return m.group('none_inclusive')
4482         return op(actual_value, comparison_value)
4483
4484     UNARY_OPERATORS = {
4485         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4486         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4487     }
4488     operator_rex = re.compile(r'''(?x)\s*
4489         (?P<op>%s)\s*(?P<key>[a-z_]+)
4490         \s*$
4491         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4492     m = operator_rex.search(filter_part)
4493     if m:
4494         op = UNARY_OPERATORS[m.group('op')]
4495         actual_value = dct.get(m.group('key'))
4496         return op(actual_value)
4497
4498     raise ValueError('Invalid filter part %r' % filter_part)
4499
4500
4501 def match_str(filter_str, dct):
4502     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4503
4504     return all(
4505         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4506
4507
4508 def match_filter_func(filter_str):
4509     def _match_func(info_dict):
4510         if match_str(filter_str, info_dict):
4511             return None
4512         else:
4513             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4514             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4515     return _match_func
4516
4517
4518 def parse_dfxp_time_expr(time_expr):
4519     if not time_expr:
4520         return
4521
4522     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4523     if mobj:
4524         return float(mobj.group('time_offset'))
4525
4526     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4527     if mobj:
4528         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4529
4530
4531 def srt_subtitles_timecode(seconds):
4532     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4533
4534
4535 def dfxp2srt(dfxp_data):
4536     '''
4537     @param dfxp_data A bytes-like object containing DFXP data
4538     @returns A unicode object containing converted SRT data
4539     '''
4540     LEGACY_NAMESPACES = (
4541         (b'http://www.w3.org/ns/ttml', [
4542             b'http://www.w3.org/2004/11/ttaf1',
4543             b'http://www.w3.org/2006/04/ttaf1',
4544             b'http://www.w3.org/2006/10/ttaf1',
4545         ]),
4546         (b'http://www.w3.org/ns/ttml#styling', [
4547             b'http://www.w3.org/ns/ttml#style',
4548         ]),
4549     )
4550
4551     SUPPORTED_STYLING = [
4552         'color',
4553         'fontFamily',
4554         'fontSize',
4555         'fontStyle',
4556         'fontWeight',
4557         'textDecoration'
4558     ]
4559
4560     _x = functools.partial(xpath_with_ns, ns_map={
4561         'xml': 'http://www.w3.org/XML/1998/namespace',
4562         'ttml': 'http://www.w3.org/ns/ttml',
4563         'tts': 'http://www.w3.org/ns/ttml#styling',
4564     })
4565
4566     styles = {}
4567     default_style = {}
4568
4569     class TTMLPElementParser(object):
4570         _out = ''
4571         _unclosed_elements = []
4572         _applied_styles = []
4573
4574         def start(self, tag, attrib):
4575             if tag in (_x('ttml:br'), 'br'):
4576                 self._out += '\n'
4577             else:
4578                 unclosed_elements = []
4579                 style = {}
4580                 element_style_id = attrib.get('style')
4581                 if default_style:
4582                     style.update(default_style)
4583                 if element_style_id:
4584                     style.update(styles.get(element_style_id, {}))
4585                 for prop in SUPPORTED_STYLING:
4586                     prop_val = attrib.get(_x('tts:' + prop))
4587                     if prop_val:
4588                         style[prop] = prop_val
4589                 if style:
4590                     font = ''
4591                     for k, v in sorted(style.items()):
4592                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4593                             continue
4594                         if k == 'color':
4595                             font += ' color="%s"' % v
4596                         elif k == 'fontSize':
4597                             font += ' size="%s"' % v
4598                         elif k == 'fontFamily':
4599                             font += ' face="%s"' % v
4600                         elif k == 'fontWeight' and v == 'bold':
4601                             self._out += '<b>'
4602                             unclosed_elements.append('b')
4603                         elif k == 'fontStyle' and v == 'italic':
4604                             self._out += '<i>'
4605                             unclosed_elements.append('i')
4606                         elif k == 'textDecoration' and v == 'underline':
4607                             self._out += '<u>'
4608                             unclosed_elements.append('u')
4609                     if font:
4610                         self._out += '<font' + font + '>'
4611                         unclosed_elements.append('font')
4612                     applied_style = {}
4613                     if self._applied_styles:
4614                         applied_style.update(self._applied_styles[-1])
4615                     applied_style.update(style)
4616                     self._applied_styles.append(applied_style)
4617                 self._unclosed_elements.append(unclosed_elements)
4618
4619         def end(self, tag):
4620             if tag not in (_x('ttml:br'), 'br'):
4621                 unclosed_elements = self._unclosed_elements.pop()
4622                 for element in reversed(unclosed_elements):
4623                     self._out += '</%s>' % element
4624                 if unclosed_elements and self._applied_styles:
4625                     self._applied_styles.pop()
4626
4627         def data(self, data):
4628             self._out += data
4629
4630         def close(self):
4631             return self._out.strip()
4632
4633     def parse_node(node):
4634         target = TTMLPElementParser()
4635         parser = xml.etree.ElementTree.XMLParser(target=target)
4636         parser.feed(xml.etree.ElementTree.tostring(node))
4637         return parser.close()
4638
4639     for k, v in LEGACY_NAMESPACES:
4640         for ns in v:
4641             dfxp_data = dfxp_data.replace(ns, k)
4642
4643     dfxp = compat_etree_fromstring(dfxp_data)
4644     out = []
4645     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4646
4647     if not paras:
4648         raise ValueError('Invalid dfxp/TTML subtitle')
4649
4650     repeat = False
4651     while True:
4652         for style in dfxp.findall(_x('.//ttml:style')):
4653             style_id = style.get('id') or style.get(_x('xml:id'))
4654             if not style_id:
4655                 continue
4656             parent_style_id = style.get('style')
4657             if parent_style_id:
4658                 if parent_style_id not in styles:
4659                     repeat = True
4660                     continue
4661                 styles[style_id] = styles[parent_style_id].copy()
4662             for prop in SUPPORTED_STYLING:
4663                 prop_val = style.get(_x('tts:' + prop))
4664                 if prop_val:
4665                     styles.setdefault(style_id, {})[prop] = prop_val
4666         if repeat:
4667             repeat = False
4668         else:
4669             break
4670
4671     for p in ('body', 'div'):
4672         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4673         if ele is None:
4674             continue
4675         style = styles.get(ele.get('style'))
4676         if not style:
4677             continue
4678         default_style.update(style)
4679
4680     for para, index in zip(paras, itertools.count(1)):
4681         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4682         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4683         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4684         if begin_time is None:
4685             continue
4686         if not end_time:
4687             if not dur:
4688                 continue
4689             end_time = begin_time + dur
4690         out.append('%d\n%s --> %s\n%s\n\n' % (
4691             index,
4692             srt_subtitles_timecode(begin_time),
4693             srt_subtitles_timecode(end_time),
4694             parse_node(para)))
4695
4696     return ''.join(out)
4697
4698
4699 def cli_option(params, command_option, param):
4700     param = params.get(param)
4701     if param:
4702         param = compat_str(param)
4703     return [command_option, param] if param is not None else []
4704
4705
4706 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4707     param = params.get(param)
4708     if param is None:
4709         return []
4710     assert isinstance(param, bool)
4711     if separator:
4712         return [command_option + separator + (true_value if param else false_value)]
4713     return [command_option, true_value if param else false_value]
4714
4715
4716 def cli_valueless_option(params, command_option, param, expected_value=True):
4717     param = params.get(param)
4718     return [command_option] if param == expected_value else []
4719
4720
4721 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4722     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4723         if use_compat:
4724             return argdict
4725         else:
4726             argdict = None
4727     if argdict is None:
4728         return default
4729     assert isinstance(argdict, dict)
4730
4731     assert isinstance(keys, (list, tuple))
4732     for key_list in keys:
4733         if isinstance(key_list, compat_str):
4734             key_list = (key_list,)
4735         arg_list = list(filter(
4736             lambda x: x is not None,
4737             [argdict.get(key.lower()) for key in key_list]))
4738         if arg_list:
4739             return [arg for args in arg_list for arg in args]
4740     return default
4741
4742
4743 class ISO639Utils(object):
4744     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4745     _lang_map = {
4746         'aa': 'aar',
4747         'ab': 'abk',
4748         'ae': 'ave',
4749         'af': 'afr',
4750         'ak': 'aka',
4751         'am': 'amh',
4752         'an': 'arg',
4753         'ar': 'ara',
4754         'as': 'asm',
4755         'av': 'ava',
4756         'ay': 'aym',
4757         'az': 'aze',
4758         'ba': 'bak',
4759         'be': 'bel',
4760         'bg': 'bul',
4761         'bh': 'bih',
4762         'bi': 'bis',
4763         'bm': 'bam',
4764         'bn': 'ben',
4765         'bo': 'bod',
4766         'br': 'bre',
4767         'bs': 'bos',
4768         'ca': 'cat',
4769         'ce': 'che',
4770         'ch': 'cha',
4771         'co': 'cos',
4772         'cr': 'cre',
4773         'cs': 'ces',
4774         'cu': 'chu',
4775         'cv': 'chv',
4776         'cy': 'cym',
4777         'da': 'dan',
4778         'de': 'deu',
4779         'dv': 'div',
4780         'dz': 'dzo',
4781         'ee': 'ewe',
4782         'el': 'ell',
4783         'en': 'eng',
4784         'eo': 'epo',
4785         'es': 'spa',
4786         'et': 'est',
4787         'eu': 'eus',
4788         'fa': 'fas',
4789         'ff': 'ful',
4790         'fi': 'fin',
4791         'fj': 'fij',
4792         'fo': 'fao',
4793         'fr': 'fra',
4794         'fy': 'fry',
4795         'ga': 'gle',
4796         'gd': 'gla',
4797         'gl': 'glg',
4798         'gn': 'grn',
4799         'gu': 'guj',
4800         'gv': 'glv',
4801         'ha': 'hau',
4802         'he': 'heb',
4803         'iw': 'heb',  # Replaced by he in 1989 revision
4804         'hi': 'hin',
4805         'ho': 'hmo',
4806         'hr': 'hrv',
4807         'ht': 'hat',
4808         'hu': 'hun',
4809         'hy': 'hye',
4810         'hz': 'her',
4811         'ia': 'ina',
4812         'id': 'ind',
4813         'in': 'ind',  # Replaced by id in 1989 revision
4814         'ie': 'ile',
4815         'ig': 'ibo',
4816         'ii': 'iii',
4817         'ik': 'ipk',
4818         'io': 'ido',
4819         'is': 'isl',
4820         'it': 'ita',
4821         'iu': 'iku',
4822         'ja': 'jpn',
4823         'jv': 'jav',
4824         'ka': 'kat',
4825         'kg': 'kon',
4826         'ki': 'kik',
4827         'kj': 'kua',
4828         'kk': 'kaz',
4829         'kl': 'kal',
4830         'km': 'khm',
4831         'kn': 'kan',
4832         'ko': 'kor',
4833         'kr': 'kau',
4834         'ks': 'kas',
4835         'ku': 'kur',
4836         'kv': 'kom',
4837         'kw': 'cor',
4838         'ky': 'kir',
4839         'la': 'lat',
4840         'lb': 'ltz',
4841         'lg': 'lug',
4842         'li': 'lim',
4843         'ln': 'lin',
4844         'lo': 'lao',
4845         'lt': 'lit',
4846         'lu': 'lub',
4847         'lv': 'lav',
4848         'mg': 'mlg',
4849         'mh': 'mah',
4850         'mi': 'mri',
4851         'mk': 'mkd',
4852         'ml': 'mal',
4853         'mn': 'mon',
4854         'mr': 'mar',
4855         'ms': 'msa',
4856         'mt': 'mlt',
4857         'my': 'mya',
4858         'na': 'nau',
4859         'nb': 'nob',
4860         'nd': 'nde',
4861         'ne': 'nep',
4862         'ng': 'ndo',
4863         'nl': 'nld',
4864         'nn': 'nno',
4865         'no': 'nor',
4866         'nr': 'nbl',
4867         'nv': 'nav',
4868         'ny': 'nya',
4869         'oc': 'oci',
4870         'oj': 'oji',
4871         'om': 'orm',
4872         'or': 'ori',
4873         'os': 'oss',
4874         'pa': 'pan',
4875         'pi': 'pli',
4876         'pl': 'pol',
4877         'ps': 'pus',
4878         'pt': 'por',
4879         'qu': 'que',
4880         'rm': 'roh',
4881         'rn': 'run',
4882         'ro': 'ron',
4883         'ru': 'rus',
4884         'rw': 'kin',
4885         'sa': 'san',
4886         'sc': 'srd',
4887         'sd': 'snd',
4888         'se': 'sme',
4889         'sg': 'sag',
4890         'si': 'sin',
4891         'sk': 'slk',
4892         'sl': 'slv',
4893         'sm': 'smo',
4894         'sn': 'sna',
4895         'so': 'som',
4896         'sq': 'sqi',
4897         'sr': 'srp',
4898         'ss': 'ssw',
4899         'st': 'sot',
4900         'su': 'sun',
4901         'sv': 'swe',
4902         'sw': 'swa',
4903         'ta': 'tam',
4904         'te': 'tel',
4905         'tg': 'tgk',
4906         'th': 'tha',
4907         'ti': 'tir',
4908         'tk': 'tuk',
4909         'tl': 'tgl',
4910         'tn': 'tsn',
4911         'to': 'ton',
4912         'tr': 'tur',
4913         'ts': 'tso',
4914         'tt': 'tat',
4915         'tw': 'twi',
4916         'ty': 'tah',
4917         'ug': 'uig',
4918         'uk': 'ukr',
4919         'ur': 'urd',
4920         'uz': 'uzb',
4921         've': 'ven',
4922         'vi': 'vie',
4923         'vo': 'vol',
4924         'wa': 'wln',
4925         'wo': 'wol',
4926         'xh': 'xho',
4927         'yi': 'yid',
4928         'ji': 'yid',  # Replaced by yi in 1989 revision
4929         'yo': 'yor',
4930         'za': 'zha',
4931         'zh': 'zho',
4932         'zu': 'zul',
4933     }
4934
4935     @classmethod
4936     def short2long(cls, code):
4937         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4938         return cls._lang_map.get(code[:2])
4939
4940     @classmethod
4941     def long2short(cls, code):
4942         """Convert language code from ISO 639-2/T to ISO 639-1"""
4943         for short_name, long_name in cls._lang_map.items():
4944             if long_name == code:
4945                 return short_name
4946
4947
4948 class ISO3166Utils(object):
4949     # From http://data.okfn.org/data/core/country-list
4950     _country_map = {
4951         'AF': 'Afghanistan',
4952         'AX': 'Åland Islands',
4953         'AL': 'Albania',
4954         'DZ': 'Algeria',
4955         'AS': 'American Samoa',
4956         'AD': 'Andorra',
4957         'AO': 'Angola',
4958         'AI': 'Anguilla',
4959         'AQ': 'Antarctica',
4960         'AG': 'Antigua and Barbuda',
4961         'AR': 'Argentina',
4962         'AM': 'Armenia',
4963         'AW': 'Aruba',
4964         'AU': 'Australia',
4965         'AT': 'Austria',
4966         'AZ': 'Azerbaijan',
4967         'BS': 'Bahamas',
4968         'BH': 'Bahrain',
4969         'BD': 'Bangladesh',
4970         'BB': 'Barbados',
4971         'BY': 'Belarus',
4972         'BE': 'Belgium',
4973         'BZ': 'Belize',
4974         'BJ': 'Benin',
4975         'BM': 'Bermuda',
4976         'BT': 'Bhutan',
4977         'BO': 'Bolivia, Plurinational State of',
4978         'BQ': 'Bonaire, Sint Eustatius and Saba',
4979         'BA': 'Bosnia and Herzegovina',
4980         'BW': 'Botswana',
4981         'BV': 'Bouvet Island',
4982         'BR': 'Brazil',
4983         'IO': 'British Indian Ocean Territory',
4984         'BN': 'Brunei Darussalam',
4985         'BG': 'Bulgaria',
4986         'BF': 'Burkina Faso',
4987         'BI': 'Burundi',
4988         'KH': 'Cambodia',
4989         'CM': 'Cameroon',
4990         'CA': 'Canada',
4991         'CV': 'Cape Verde',
4992         'KY': 'Cayman Islands',
4993         'CF': 'Central African Republic',
4994         'TD': 'Chad',
4995         'CL': 'Chile',
4996         'CN': 'China',
4997         'CX': 'Christmas Island',
4998         'CC': 'Cocos (Keeling) Islands',
4999         'CO': 'Colombia',
5000         'KM': 'Comoros',
5001         'CG': 'Congo',
5002         'CD': 'Congo, the Democratic Republic of the',
5003         'CK': 'Cook Islands',
5004         'CR': 'Costa Rica',
5005         'CI': 'Côte d\'Ivoire',
5006         'HR': 'Croatia',
5007         'CU': 'Cuba',
5008         'CW': 'Curaçao',
5009         'CY': 'Cyprus',
5010         'CZ': 'Czech Republic',
5011         'DK': 'Denmark',
5012         'DJ': 'Djibouti',
5013         'DM': 'Dominica',
5014         'DO': 'Dominican Republic',
5015         'EC': 'Ecuador',
5016         'EG': 'Egypt',
5017         'SV': 'El Salvador',
5018         'GQ': 'Equatorial Guinea',
5019         'ER': 'Eritrea',
5020         'EE': 'Estonia',
5021         'ET': 'Ethiopia',
5022         'FK': 'Falkland Islands (Malvinas)',
5023         'FO': 'Faroe Islands',
5024         'FJ': 'Fiji',
5025         'FI': 'Finland',
5026         'FR': 'France',
5027         'GF': 'French Guiana',
5028         'PF': 'French Polynesia',
5029         'TF': 'French Southern Territories',
5030         'GA': 'Gabon',
5031         'GM': 'Gambia',
5032         'GE': 'Georgia',
5033         'DE': 'Germany',
5034         'GH': 'Ghana',
5035         'GI': 'Gibraltar',
5036         'GR': 'Greece',
5037         'GL': 'Greenland',
5038         'GD': 'Grenada',
5039         'GP': 'Guadeloupe',
5040         'GU': 'Guam',
5041         'GT': 'Guatemala',
5042         'GG': 'Guernsey',
5043         'GN': 'Guinea',
5044         'GW': 'Guinea-Bissau',
5045         'GY': 'Guyana',
5046         'HT': 'Haiti',
5047         'HM': 'Heard Island and McDonald Islands',
5048         'VA': 'Holy See (Vatican City State)',
5049         'HN': 'Honduras',
5050         'HK': 'Hong Kong',
5051         'HU': 'Hungary',
5052         'IS': 'Iceland',
5053         'IN': 'India',
5054         'ID': 'Indonesia',
5055         'IR': 'Iran, Islamic Republic of',
5056         'IQ': 'Iraq',
5057         'IE': 'Ireland',
5058         'IM': 'Isle of Man',
5059         'IL': 'Israel',
5060         'IT': 'Italy',
5061         'JM': 'Jamaica',
5062         'JP': 'Japan',
5063         'JE': 'Jersey',
5064         'JO': 'Jordan',
5065         'KZ': 'Kazakhstan',
5066         'KE': 'Kenya',
5067         'KI': 'Kiribati',
5068         'KP': 'Korea, Democratic People\'s Republic of',
5069         'KR': 'Korea, Republic of',
5070         'KW': 'Kuwait',
5071         'KG': 'Kyrgyzstan',
5072         'LA': 'Lao People\'s Democratic Republic',
5073         'LV': 'Latvia',
5074         'LB': 'Lebanon',
5075         'LS': 'Lesotho',
5076         'LR': 'Liberia',
5077         'LY': 'Libya',
5078         'LI': 'Liechtenstein',
5079         'LT': 'Lithuania',
5080         'LU': 'Luxembourg',
5081         'MO': 'Macao',
5082         'MK': 'Macedonia, the Former Yugoslav Republic of',
5083         'MG': 'Madagascar',
5084         'MW': 'Malawi',
5085         'MY': 'Malaysia',
5086         'MV': 'Maldives',
5087         'ML': 'Mali',
5088         'MT': 'Malta',
5089         'MH': 'Marshall Islands',
5090         'MQ': 'Martinique',
5091         'MR': 'Mauritania',
5092         'MU': 'Mauritius',
5093         'YT': 'Mayotte',
5094         'MX': 'Mexico',
5095         'FM': 'Micronesia, Federated States of',
5096         'MD': 'Moldova, Republic of',
5097         'MC': 'Monaco',
5098         'MN': 'Mongolia',
5099         'ME': 'Montenegro',
5100         'MS': 'Montserrat',
5101         'MA': 'Morocco',
5102         'MZ': 'Mozambique',
5103         'MM': 'Myanmar',
5104         'NA': 'Namibia',
5105         'NR': 'Nauru',
5106         'NP': 'Nepal',
5107         'NL': 'Netherlands',
5108         'NC': 'New Caledonia',
5109         'NZ': 'New Zealand',
5110         'NI': 'Nicaragua',
5111         'NE': 'Niger',
5112         'NG': 'Nigeria',
5113         'NU': 'Niue',
5114         'NF': 'Norfolk Island',
5115         'MP': 'Northern Mariana Islands',
5116         'NO': 'Norway',
5117         'OM': 'Oman',
5118         'PK': 'Pakistan',
5119         'PW': 'Palau',
5120         'PS': 'Palestine, State of',
5121         'PA': 'Panama',
5122         'PG': 'Papua New Guinea',
5123         'PY': 'Paraguay',
5124         'PE': 'Peru',
5125         'PH': 'Philippines',
5126         'PN': 'Pitcairn',
5127         'PL': 'Poland',
5128         'PT': 'Portugal',
5129         'PR': 'Puerto Rico',
5130         'QA': 'Qatar',
5131         'RE': 'Réunion',
5132         'RO': 'Romania',
5133         'RU': 'Russian Federation',
5134         'RW': 'Rwanda',
5135         'BL': 'Saint Barthélemy',
5136         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5137         'KN': 'Saint Kitts and Nevis',
5138         'LC': 'Saint Lucia',
5139         'MF': 'Saint Martin (French part)',
5140         'PM': 'Saint Pierre and Miquelon',
5141         'VC': 'Saint Vincent and the Grenadines',
5142         'WS': 'Samoa',
5143         'SM': 'San Marino',
5144         'ST': 'Sao Tome and Principe',
5145         'SA': 'Saudi Arabia',
5146         'SN': 'Senegal',
5147         'RS': 'Serbia',
5148         'SC': 'Seychelles',
5149         'SL': 'Sierra Leone',
5150         'SG': 'Singapore',
5151         'SX': 'Sint Maarten (Dutch part)',
5152         'SK': 'Slovakia',
5153         'SI': 'Slovenia',
5154         'SB': 'Solomon Islands',
5155         'SO': 'Somalia',
5156         'ZA': 'South Africa',
5157         'GS': 'South Georgia and the South Sandwich Islands',
5158         'SS': 'South Sudan',
5159         'ES': 'Spain',
5160         'LK': 'Sri Lanka',
5161         'SD': 'Sudan',
5162         'SR': 'Suriname',
5163         'SJ': 'Svalbard and Jan Mayen',
5164         'SZ': 'Swaziland',
5165         'SE': 'Sweden',
5166         'CH': 'Switzerland',
5167         'SY': 'Syrian Arab Republic',
5168         'TW': 'Taiwan, Province of China',
5169         'TJ': 'Tajikistan',
5170         'TZ': 'Tanzania, United Republic of',
5171         'TH': 'Thailand',
5172         'TL': 'Timor-Leste',
5173         'TG': 'Togo',
5174         'TK': 'Tokelau',
5175         'TO': 'Tonga',
5176         'TT': 'Trinidad and Tobago',
5177         'TN': 'Tunisia',
5178         'TR': 'Turkey',
5179         'TM': 'Turkmenistan',
5180         'TC': 'Turks and Caicos Islands',
5181         'TV': 'Tuvalu',
5182         'UG': 'Uganda',
5183         'UA': 'Ukraine',
5184         'AE': 'United Arab Emirates',
5185         'GB': 'United Kingdom',
5186         'US': 'United States',
5187         'UM': 'United States Minor Outlying Islands',
5188         'UY': 'Uruguay',
5189         'UZ': 'Uzbekistan',
5190         'VU': 'Vanuatu',
5191         'VE': 'Venezuela, Bolivarian Republic of',
5192         'VN': 'Viet Nam',
5193         'VG': 'Virgin Islands, British',
5194         'VI': 'Virgin Islands, U.S.',
5195         'WF': 'Wallis and Futuna',
5196         'EH': 'Western Sahara',
5197         'YE': 'Yemen',
5198         'ZM': 'Zambia',
5199         'ZW': 'Zimbabwe',
5200     }
5201
5202     @classmethod
5203     def short2full(cls, code):
5204         """Convert an ISO 3166-2 country code to the corresponding full name"""
5205         return cls._country_map.get(code.upper())
5206
5207
5208 class GeoUtils(object):
5209     # Major IPv4 address blocks per country
5210     _country_ip_map = {
5211         'AD': '46.172.224.0/19',
5212         'AE': '94.200.0.0/13',
5213         'AF': '149.54.0.0/17',
5214         'AG': '209.59.64.0/18',
5215         'AI': '204.14.248.0/21',
5216         'AL': '46.99.0.0/16',
5217         'AM': '46.70.0.0/15',
5218         'AO': '105.168.0.0/13',
5219         'AP': '182.50.184.0/21',
5220         'AQ': '23.154.160.0/24',
5221         'AR': '181.0.0.0/12',
5222         'AS': '202.70.112.0/20',
5223         'AT': '77.116.0.0/14',
5224         'AU': '1.128.0.0/11',
5225         'AW': '181.41.0.0/18',
5226         'AX': '185.217.4.0/22',
5227         'AZ': '5.197.0.0/16',
5228         'BA': '31.176.128.0/17',
5229         'BB': '65.48.128.0/17',
5230         'BD': '114.130.0.0/16',
5231         'BE': '57.0.0.0/8',
5232         'BF': '102.178.0.0/15',
5233         'BG': '95.42.0.0/15',
5234         'BH': '37.131.0.0/17',
5235         'BI': '154.117.192.0/18',
5236         'BJ': '137.255.0.0/16',
5237         'BL': '185.212.72.0/23',
5238         'BM': '196.12.64.0/18',
5239         'BN': '156.31.0.0/16',
5240         'BO': '161.56.0.0/16',
5241         'BQ': '161.0.80.0/20',
5242         'BR': '191.128.0.0/12',
5243         'BS': '24.51.64.0/18',
5244         'BT': '119.2.96.0/19',
5245         'BW': '168.167.0.0/16',
5246         'BY': '178.120.0.0/13',
5247         'BZ': '179.42.192.0/18',
5248         'CA': '99.224.0.0/11',
5249         'CD': '41.243.0.0/16',
5250         'CF': '197.242.176.0/21',
5251         'CG': '160.113.0.0/16',
5252         'CH': '85.0.0.0/13',
5253         'CI': '102.136.0.0/14',
5254         'CK': '202.65.32.0/19',
5255         'CL': '152.172.0.0/14',
5256         'CM': '102.244.0.0/14',
5257         'CN': '36.128.0.0/10',
5258         'CO': '181.240.0.0/12',
5259         'CR': '201.192.0.0/12',
5260         'CU': '152.206.0.0/15',
5261         'CV': '165.90.96.0/19',
5262         'CW': '190.88.128.0/17',
5263         'CY': '31.153.0.0/16',
5264         'CZ': '88.100.0.0/14',
5265         'DE': '53.0.0.0/8',
5266         'DJ': '197.241.0.0/17',
5267         'DK': '87.48.0.0/12',
5268         'DM': '192.243.48.0/20',
5269         'DO': '152.166.0.0/15',
5270         'DZ': '41.96.0.0/12',
5271         'EC': '186.68.0.0/15',
5272         'EE': '90.190.0.0/15',
5273         'EG': '156.160.0.0/11',
5274         'ER': '196.200.96.0/20',
5275         'ES': '88.0.0.0/11',
5276         'ET': '196.188.0.0/14',
5277         'EU': '2.16.0.0/13',
5278         'FI': '91.152.0.0/13',
5279         'FJ': '144.120.0.0/16',
5280         'FK': '80.73.208.0/21',
5281         'FM': '119.252.112.0/20',
5282         'FO': '88.85.32.0/19',
5283         'FR': '90.0.0.0/9',
5284         'GA': '41.158.0.0/15',
5285         'GB': '25.0.0.0/8',
5286         'GD': '74.122.88.0/21',
5287         'GE': '31.146.0.0/16',
5288         'GF': '161.22.64.0/18',
5289         'GG': '62.68.160.0/19',
5290         'GH': '154.160.0.0/12',
5291         'GI': '95.164.0.0/16',
5292         'GL': '88.83.0.0/19',
5293         'GM': '160.182.0.0/15',
5294         'GN': '197.149.192.0/18',
5295         'GP': '104.250.0.0/19',
5296         'GQ': '105.235.224.0/20',
5297         'GR': '94.64.0.0/13',
5298         'GT': '168.234.0.0/16',
5299         'GU': '168.123.0.0/16',
5300         'GW': '197.214.80.0/20',
5301         'GY': '181.41.64.0/18',
5302         'HK': '113.252.0.0/14',
5303         'HN': '181.210.0.0/16',
5304         'HR': '93.136.0.0/13',
5305         'HT': '148.102.128.0/17',
5306         'HU': '84.0.0.0/14',
5307         'ID': '39.192.0.0/10',
5308         'IE': '87.32.0.0/12',
5309         'IL': '79.176.0.0/13',
5310         'IM': '5.62.80.0/20',
5311         'IN': '117.192.0.0/10',
5312         'IO': '203.83.48.0/21',
5313         'IQ': '37.236.0.0/14',
5314         'IR': '2.176.0.0/12',
5315         'IS': '82.221.0.0/16',
5316         'IT': '79.0.0.0/10',
5317         'JE': '87.244.64.0/18',
5318         'JM': '72.27.0.0/17',
5319         'JO': '176.29.0.0/16',
5320         'JP': '133.0.0.0/8',
5321         'KE': '105.48.0.0/12',
5322         'KG': '158.181.128.0/17',
5323         'KH': '36.37.128.0/17',
5324         'KI': '103.25.140.0/22',
5325         'KM': '197.255.224.0/20',
5326         'KN': '198.167.192.0/19',
5327         'KP': '175.45.176.0/22',
5328         'KR': '175.192.0.0/10',
5329         'KW': '37.36.0.0/14',
5330         'KY': '64.96.0.0/15',
5331         'KZ': '2.72.0.0/13',
5332         'LA': '115.84.64.0/18',
5333         'LB': '178.135.0.0/16',
5334         'LC': '24.92.144.0/20',
5335         'LI': '82.117.0.0/19',
5336         'LK': '112.134.0.0/15',
5337         'LR': '102.183.0.0/16',
5338         'LS': '129.232.0.0/17',
5339         'LT': '78.56.0.0/13',
5340         'LU': '188.42.0.0/16',
5341         'LV': '46.109.0.0/16',
5342         'LY': '41.252.0.0/14',
5343         'MA': '105.128.0.0/11',
5344         'MC': '88.209.64.0/18',
5345         'MD': '37.246.0.0/16',
5346         'ME': '178.175.0.0/17',
5347         'MF': '74.112.232.0/21',
5348         'MG': '154.126.0.0/17',
5349         'MH': '117.103.88.0/21',
5350         'MK': '77.28.0.0/15',
5351         'ML': '154.118.128.0/18',
5352         'MM': '37.111.0.0/17',
5353         'MN': '49.0.128.0/17',
5354         'MO': '60.246.0.0/16',
5355         'MP': '202.88.64.0/20',
5356         'MQ': '109.203.224.0/19',
5357         'MR': '41.188.64.0/18',
5358         'MS': '208.90.112.0/22',
5359         'MT': '46.11.0.0/16',
5360         'MU': '105.16.0.0/12',
5361         'MV': '27.114.128.0/18',
5362         'MW': '102.70.0.0/15',
5363         'MX': '187.192.0.0/11',
5364         'MY': '175.136.0.0/13',
5365         'MZ': '197.218.0.0/15',
5366         'NA': '41.182.0.0/16',
5367         'NC': '101.101.0.0/18',
5368         'NE': '197.214.0.0/18',
5369         'NF': '203.17.240.0/22',
5370         'NG': '105.112.0.0/12',
5371         'NI': '186.76.0.0/15',
5372         'NL': '145.96.0.0/11',
5373         'NO': '84.208.0.0/13',
5374         'NP': '36.252.0.0/15',
5375         'NR': '203.98.224.0/19',
5376         'NU': '49.156.48.0/22',
5377         'NZ': '49.224.0.0/14',
5378         'OM': '5.36.0.0/15',
5379         'PA': '186.72.0.0/15',
5380         'PE': '186.160.0.0/14',
5381         'PF': '123.50.64.0/18',
5382         'PG': '124.240.192.0/19',
5383         'PH': '49.144.0.0/13',
5384         'PK': '39.32.0.0/11',
5385         'PL': '83.0.0.0/11',
5386         'PM': '70.36.0.0/20',
5387         'PR': '66.50.0.0/16',
5388         'PS': '188.161.0.0/16',
5389         'PT': '85.240.0.0/13',
5390         'PW': '202.124.224.0/20',
5391         'PY': '181.120.0.0/14',
5392         'QA': '37.210.0.0/15',
5393         'RE': '102.35.0.0/16',
5394         'RO': '79.112.0.0/13',
5395         'RS': '93.86.0.0/15',
5396         'RU': '5.136.0.0/13',
5397         'RW': '41.186.0.0/16',
5398         'SA': '188.48.0.0/13',
5399         'SB': '202.1.160.0/19',
5400         'SC': '154.192.0.0/11',
5401         'SD': '102.120.0.0/13',
5402         'SE': '78.64.0.0/12',
5403         'SG': '8.128.0.0/10',
5404         'SI': '188.196.0.0/14',
5405         'SK': '78.98.0.0/15',
5406         'SL': '102.143.0.0/17',
5407         'SM': '89.186.32.0/19',
5408         'SN': '41.82.0.0/15',
5409         'SO': '154.115.192.0/18',
5410         'SR': '186.179.128.0/17',
5411         'SS': '105.235.208.0/21',
5412         'ST': '197.159.160.0/19',
5413         'SV': '168.243.0.0/16',
5414         'SX': '190.102.0.0/20',
5415         'SY': '5.0.0.0/16',
5416         'SZ': '41.84.224.0/19',
5417         'TC': '65.255.48.0/20',
5418         'TD': '154.68.128.0/19',
5419         'TG': '196.168.0.0/14',
5420         'TH': '171.96.0.0/13',
5421         'TJ': '85.9.128.0/18',
5422         'TK': '27.96.24.0/21',
5423         'TL': '180.189.160.0/20',
5424         'TM': '95.85.96.0/19',
5425         'TN': '197.0.0.0/11',
5426         'TO': '175.176.144.0/21',
5427         'TR': '78.160.0.0/11',
5428         'TT': '186.44.0.0/15',
5429         'TV': '202.2.96.0/19',
5430         'TW': '120.96.0.0/11',
5431         'TZ': '156.156.0.0/14',
5432         'UA': '37.52.0.0/14',
5433         'UG': '102.80.0.0/13',
5434         'US': '6.0.0.0/8',
5435         'UY': '167.56.0.0/13',
5436         'UZ': '84.54.64.0/18',
5437         'VA': '212.77.0.0/19',
5438         'VC': '207.191.240.0/21',
5439         'VE': '186.88.0.0/13',
5440         'VG': '66.81.192.0/20',
5441         'VI': '146.226.0.0/16',
5442         'VN': '14.160.0.0/11',
5443         'VU': '202.80.32.0/20',
5444         'WF': '117.20.32.0/21',
5445         'WS': '202.4.32.0/19',
5446         'YE': '134.35.0.0/16',
5447         'YT': '41.242.116.0/22',
5448         'ZA': '41.0.0.0/11',
5449         'ZM': '102.144.0.0/13',
5450         'ZW': '102.177.192.0/18',
5451     }
5452
5453     @classmethod
5454     def random_ipv4(cls, code_or_block):
5455         if len(code_or_block) == 2:
5456             block = cls._country_ip_map.get(code_or_block.upper())
5457             if not block:
5458                 return None
5459         else:
5460             block = code_or_block
5461         addr, preflen = block.split('/')
5462         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5463         addr_max = addr_min | (0xffffffff >> int(preflen))
5464         return compat_str(socket.inet_ntoa(
5465             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5466
5467
5468 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5469     def __init__(self, proxies=None):
5470         # Set default handlers
5471         for type in ('http', 'https'):
5472             setattr(self, '%s_open' % type,
5473                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5474                         meth(r, proxy, type))
5475         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5476
5477     def proxy_open(self, req, proxy, type):
5478         req_proxy = req.headers.get('Ytdl-request-proxy')
5479         if req_proxy is not None:
5480             proxy = req_proxy
5481             del req.headers['Ytdl-request-proxy']
5482
5483         if proxy == '__noproxy__':
5484             return None  # No Proxy
5485         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5486             req.add_header('Ytdl-socks-proxy', proxy)
5487             # yt-dlp's http/https handlers do wrapping the socket with socks
5488             return None
5489         return compat_urllib_request.ProxyHandler.proxy_open(
5490             self, req, proxy, type)
5491
5492
5493 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5494 # released into Public Domain
5495 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5496
5497 def long_to_bytes(n, blocksize=0):
5498     """long_to_bytes(n:long, blocksize:int) : string
5499     Convert a long integer to a byte string.
5500
5501     If optional blocksize is given and greater than zero, pad the front of the
5502     byte string with binary zeros so that the length is a multiple of
5503     blocksize.
5504     """
5505     # after much testing, this algorithm was deemed to be the fastest
5506     s = b''
5507     n = int(n)
5508     while n > 0:
5509         s = compat_struct_pack('>I', n & 0xffffffff) + s
5510         n = n >> 32
5511     # strip off leading zeros
5512     for i in range(len(s)):
5513         if s[i] != b'\000'[0]:
5514             break
5515     else:
5516         # only happens when n == 0
5517         s = b'\000'
5518         i = 0
5519     s = s[i:]
5520     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5521     # de-padding being done above, but sigh...
5522     if blocksize > 0 and len(s) % blocksize:
5523         s = (blocksize - len(s) % blocksize) * b'\000' + s
5524     return s
5525
5526
5527 def bytes_to_long(s):
5528     """bytes_to_long(string) : long
5529     Convert a byte string to a long integer.
5530
5531     This is (essentially) the inverse of long_to_bytes().
5532     """
5533     acc = 0
5534     length = len(s)
5535     if length % 4:
5536         extra = (4 - length % 4)
5537         s = b'\000' * extra + s
5538         length = length + extra
5539     for i in range(0, length, 4):
5540         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5541     return acc
5542
5543
5544 def ohdave_rsa_encrypt(data, exponent, modulus):
5545     '''
5546     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5547
5548     Input:
5549         data: data to encrypt, bytes-like object
5550         exponent, modulus: parameter e and N of RSA algorithm, both integer
5551     Output: hex string of encrypted data
5552
5553     Limitation: supports one block encryption only
5554     '''
5555
5556     payload = int(binascii.hexlify(data[::-1]), 16)
5557     encrypted = pow(payload, exponent, modulus)
5558     return '%x' % encrypted
5559
5560
5561 def pkcs1pad(data, length):
5562     """
5563     Padding input data with PKCS#1 scheme
5564
5565     @param {int[]} data        input data
5566     @param {int}   length      target length
5567     @returns {int[]}           padded data
5568     """
5569     if len(data) > length - 11:
5570         raise ValueError('Input data too long for PKCS#1 padding')
5571
5572     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5573     return [0, 2] + pseudo_random + [0] + data
5574
5575
5576 def encode_base_n(num, n, table=None):
5577     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5578     if not table:
5579         table = FULL_TABLE[:n]
5580
5581     if n > len(table):
5582         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5583
5584     if num == 0:
5585         return table[0]
5586
5587     ret = ''
5588     while num:
5589         ret = table[num % n] + ret
5590         num = num // n
5591     return ret
5592
5593
5594 def decode_packed_codes(code):
5595     mobj = re.search(PACKED_CODES_RE, code)
5596     obfuscated_code, base, count, symbols = mobj.groups()
5597     base = int(base)
5598     count = int(count)
5599     symbols = symbols.split('|')
5600     symbol_table = {}
5601
5602     while count:
5603         count -= 1
5604         base_n_count = encode_base_n(count, base)
5605         symbol_table[base_n_count] = symbols[count] or base_n_count
5606
5607     return re.sub(
5608         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5609         obfuscated_code)
5610
5611
5612 def caesar(s, alphabet, shift):
5613     if shift == 0:
5614         return s
5615     l = len(alphabet)
5616     return ''.join(
5617         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5618         for c in s)
5619
5620
5621 def rot47(s):
5622     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5623
5624
5625 def parse_m3u8_attributes(attrib):
5626     info = {}
5627     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5628         if val.startswith('"'):
5629             val = val[1:-1]
5630         info[key] = val
5631     return info
5632
5633
5634 def urshift(val, n):
5635     return val >> n if val >= 0 else (val + 0x100000000) >> n
5636
5637
5638 # Based on png2str() written by @gdkchan and improved by @yokrysty
5639 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5640 def decode_png(png_data):
5641     # Reference: https://www.w3.org/TR/PNG/
5642     header = png_data[8:]
5643
5644     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5645         raise IOError('Not a valid PNG file.')
5646
5647     int_map = {1: '>B', 2: '>H', 4: '>I'}
5648     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5649
5650     chunks = []
5651
5652     while header:
5653         length = unpack_integer(header[:4])
5654         header = header[4:]
5655
5656         chunk_type = header[:4]
5657         header = header[4:]
5658
5659         chunk_data = header[:length]
5660         header = header[length:]
5661
5662         header = header[4:]  # Skip CRC
5663
5664         chunks.append({
5665             'type': chunk_type,
5666             'length': length,
5667             'data': chunk_data
5668         })
5669
5670     ihdr = chunks[0]['data']
5671
5672     width = unpack_integer(ihdr[:4])
5673     height = unpack_integer(ihdr[4:8])
5674
5675     idat = b''
5676
5677     for chunk in chunks:
5678         if chunk['type'] == b'IDAT':
5679             idat += chunk['data']
5680
5681     if not idat:
5682         raise IOError('Unable to read PNG data.')
5683
5684     decompressed_data = bytearray(zlib.decompress(idat))
5685
5686     stride = width * 3
5687     pixels = []
5688
5689     def _get_pixel(idx):
5690         x = idx % stride
5691         y = idx // stride
5692         return pixels[y][x]
5693
5694     for y in range(height):
5695         basePos = y * (1 + stride)
5696         filter_type = decompressed_data[basePos]
5697
5698         current_row = []
5699
5700         pixels.append(current_row)
5701
5702         for x in range(stride):
5703             color = decompressed_data[1 + basePos + x]
5704             basex = y * stride + x
5705             left = 0
5706             up = 0
5707
5708             if x > 2:
5709                 left = _get_pixel(basex - 3)
5710             if y > 0:
5711                 up = _get_pixel(basex - stride)
5712
5713             if filter_type == 1:  # Sub
5714                 color = (color + left) & 0xff
5715             elif filter_type == 2:  # Up
5716                 color = (color + up) & 0xff
5717             elif filter_type == 3:  # Average
5718                 color = (color + ((left + up) >> 1)) & 0xff
5719             elif filter_type == 4:  # Paeth
5720                 a = left
5721                 b = up
5722                 c = 0
5723
5724                 if x > 2 and y > 0:
5725                     c = _get_pixel(basex - stride - 3)
5726
5727                 p = a + b - c
5728
5729                 pa = abs(p - a)
5730                 pb = abs(p - b)
5731                 pc = abs(p - c)
5732
5733                 if pa <= pb and pa <= pc:
5734                     color = (color + a) & 0xff
5735                 elif pb <= pc:
5736                     color = (color + b) & 0xff
5737                 else:
5738                     color = (color + c) & 0xff
5739
5740             current_row.append(color)
5741
5742     return width, height, pixels
5743
5744
5745 def write_xattr(path, key, value):
5746     # This mess below finds the best xattr tool for the job
5747     try:
5748         # try the pyxattr module...
5749         import xattr
5750
5751         if hasattr(xattr, 'set'):  # pyxattr
5752             # Unicode arguments are not supported in python-pyxattr until
5753             # version 0.5.0
5754             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5755             pyxattr_required_version = '0.5.0'
5756             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5757                 # TODO: fallback to CLI tools
5758                 raise XAttrUnavailableError(
5759                     'python-pyxattr is detected but is too old. '
5760                     'yt-dlp requires %s or above while your version is %s. '
5761                     'Falling back to other xattr implementations' % (
5762                         pyxattr_required_version, xattr.__version__))
5763
5764             setxattr = xattr.set
5765         else:  # xattr
5766             setxattr = xattr.setxattr
5767
5768         try:
5769             setxattr(path, key, value)
5770         except EnvironmentError as e:
5771             raise XAttrMetadataError(e.errno, e.strerror)
5772
5773     except ImportError:
5774         if compat_os_name == 'nt':
5775             # Write xattrs to NTFS Alternate Data Streams:
5776             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5777             assert ':' not in key
5778             assert os.path.exists(path)
5779
5780             ads_fn = path + ':' + key
5781             try:
5782                 with open(ads_fn, 'wb') as f:
5783                     f.write(value)
5784             except EnvironmentError as e:
5785                 raise XAttrMetadataError(e.errno, e.strerror)
5786         else:
5787             user_has_setfattr = check_executable('setfattr', ['--version'])
5788             user_has_xattr = check_executable('xattr', ['-h'])
5789
5790             if user_has_setfattr or user_has_xattr:
5791
5792                 value = value.decode('utf-8')
5793                 if user_has_setfattr:
5794                     executable = 'setfattr'
5795                     opts = ['-n', key, '-v', value]
5796                 elif user_has_xattr:
5797                     executable = 'xattr'
5798                     opts = ['-w', key, value]
5799
5800                 cmd = ([encodeFilename(executable, True)]
5801                        + [encodeArgument(o) for o in opts]
5802                        + [encodeFilename(path, True)])
5803
5804                 try:
5805                     p = subprocess.Popen(
5806                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5807                 except EnvironmentError as e:
5808                     raise XAttrMetadataError(e.errno, e.strerror)
5809                 stdout, stderr = process_communicate_or_kill(p)
5810                 stderr = stderr.decode('utf-8', 'replace')
5811                 if p.returncode != 0:
5812                     raise XAttrMetadataError(p.returncode, stderr)
5813
5814             else:
5815                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5816                 if sys.platform.startswith('linux'):
5817                     raise XAttrUnavailableError(
5818                         "Couldn't find a tool to set the xattrs. "
5819                         "Install either the python 'pyxattr' or 'xattr' "
5820                         "modules, or the GNU 'attr' package "
5821                         "(which contains the 'setfattr' tool).")
5822                 else:
5823                     raise XAttrUnavailableError(
5824                         "Couldn't find a tool to set the xattrs. "
5825                         "Install either the python 'xattr' module, "
5826                         "or the 'xattr' binary.")
5827
5828
5829 def random_birthday(year_field, month_field, day_field):
5830     start_date = datetime.date(1950, 1, 1)
5831     end_date = datetime.date(1995, 12, 31)
5832     offset = random.randint(0, (end_date - start_date).days)
5833     random_date = start_date + datetime.timedelta(offset)
5834     return {
5835         year_field: str(random_date.year),
5836         month_field: str(random_date.month),
5837         day_field: str(random_date.day),
5838     }
5839
5840
5841 # Templates for internet shortcut files, which are plain text files.
5842 DOT_URL_LINK_TEMPLATE = '''
5843 [InternetShortcut]
5844 URL=%(url)s
5845 '''.lstrip()
5846
5847 DOT_WEBLOC_LINK_TEMPLATE = '''
5848 <?xml version="1.0" encoding="UTF-8"?>
5849 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5850 <plist version="1.0">
5851 <dict>
5852 \t<key>URL</key>
5853 \t<string>%(url)s</string>
5854 </dict>
5855 </plist>
5856 '''.lstrip()
5857
5858 DOT_DESKTOP_LINK_TEMPLATE = '''
5859 [Desktop Entry]
5860 Encoding=UTF-8
5861 Name=%(filename)s
5862 Type=Link
5863 URL=%(url)s
5864 Icon=text-html
5865 '''.lstrip()
5866
5867
5868 def iri_to_uri(iri):
5869     """
5870     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5871
5872     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5873     """
5874
5875     iri_parts = compat_urllib_parse_urlparse(iri)
5876
5877     if '[' in iri_parts.netloc:
5878         raise ValueError('IPv6 URIs are not, yet, supported.')
5879         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5880
5881     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5882
5883     net_location = ''
5884     if iri_parts.username:
5885         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5886         if iri_parts.password is not None:
5887             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5888         net_location += '@'
5889
5890     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5891     # The 'idna' encoding produces ASCII text.
5892     if iri_parts.port is not None and iri_parts.port != 80:
5893         net_location += ':' + str(iri_parts.port)
5894
5895     return compat_urllib_parse_urlunparse(
5896         (iri_parts.scheme,
5897             net_location,
5898
5899             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5900
5901             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5902             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5903
5904             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5905             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5906
5907             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5908
5909     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5910
5911
5912 def to_high_limit_path(path):
5913     if sys.platform in ['win32', 'cygwin']:
5914         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5915         return r'\\?\ '.rstrip() + os.path.abspath(path)
5916
5917     return path
5918
5919
5920 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5921     val = obj.get(field, default)
5922     if func and val not in ignore:
5923         val = func(val)
5924     return template % val if val not in ignore else default
5925
5926
5927 def clean_podcast_url(url):
5928     return re.sub(r'''(?x)
5929         (?:
5930             (?:
5931                 chtbl\.com/track|
5932                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5933                 play\.podtrac\.com
5934             )/[^/]+|
5935             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5936             flex\.acast\.com|
5937             pd(?:
5938                 cn\.co| # https://podcorn.com/analytics-prefix/
5939                 st\.fm # https://podsights.com/docs/
5940             )/e
5941         )/''', '', url)
5942
5943
5944 _HEX_TABLE = '0123456789abcdef'
5945
5946
5947 def random_uuidv4():
5948     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5949
5950
5951 def make_dir(path, to_screen=None):
5952     try:
5953         dn = os.path.dirname(path)
5954         if dn and not os.path.exists(dn):
5955             os.makedirs(dn)
5956         return True
5957     except (OSError, IOError) as err:
5958         if callable(to_screen) is not None:
5959             to_screen('unable to create directory ' + error_to_compat_str(err))
5960         return False
5961
5962
5963 def get_executable_path():
5964     from zipimport import zipimporter
5965     if hasattr(sys, 'frozen'):  # Running from PyInstaller
5966         path = os.path.dirname(sys.executable)
5967     elif isinstance(globals().get('__loader__'), zipimporter):  # Running from ZIP
5968         path = os.path.join(os.path.dirname(__file__), '../..')
5969     else:
5970         path = os.path.join(os.path.dirname(__file__), '..')
5971     return os.path.abspath(path)
5972
5973
5974 def load_plugins(name, type, namespace):
5975     plugin_info = [None]
5976     classes = []
5977     try:
5978         plugin_info = imp.find_module(
5979             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5980         plugins = imp.load_module(name, *plugin_info)
5981         for name in dir(plugins):
5982             if not name.endswith(type):
5983                 continue
5984             klass = getattr(plugins, name)
5985             classes.append(klass)
5986             namespace[name] = klass
5987     except ImportError:
5988         pass
5989     finally:
5990         if plugin_info[0] is not None:
5991             plugin_info[0].close()
5992     return classes
5993
5994
5995 def traverse_dict(dictn, keys, casesense=True):
5996     if not isinstance(dictn, dict):
5997         return None
5998     first_key = keys[0]
5999     if not casesense:
6000         dictn = {key.lower(): val for key, val in dictn.items()}
6001         first_key = first_key.lower()
6002     value = dictn.get(first_key, None)
6003     return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)