youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_numeric_types,
  54     compat_kwargs,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_parse_urlunparse,
  66     compat_urllib_parse_quote,
  67     compat_urllib_parse_quote_plus,
  68     compat_urllib_parse_unquote_plus,
  69     compat_urllib_request,
  70     compat_urlparse,
  71     compat_xpath,
  72 )
  73
  74 from .socks import (
  75     ProxyType,
  76     sockssocket,
  77 )
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '74.0.3729.129',
  97         '76.0.3780.3',
  98         '76.0.3780.2',
  99         '74.0.3729.128',
 100         '76.0.3780.1',
 101         '76.0.3780.0',
 102         '75.0.3770.15',
 103         '74.0.3729.127',
 104         '74.0.3729.126',
 105         '76.0.3779.1',
 106         '76.0.3779.0',
 107         '75.0.3770.14',
 108         '74.0.3729.125',
 109         '76.0.3778.1',
 110         '76.0.3778.0',
 111         '75.0.3770.13',
 112         '74.0.3729.124',
 113         '74.0.3729.123',
 114         '73.0.3683.121',
 115         '76.0.3777.1',
 116         '76.0.3777.0',
 117         '75.0.3770.12',
 118         '74.0.3729.122',
 119         '76.0.3776.4',
 120         '75.0.3770.11',
 121         '74.0.3729.121',
 122         '76.0.3776.3',
 123         '76.0.3776.2',
 124         '73.0.3683.120',
 125         '74.0.3729.120',
 126         '74.0.3729.119',
 127         '74.0.3729.118',
 128         '76.0.3776.1',
 129         '76.0.3776.0',
 130         '76.0.3775.5',
 131         '75.0.3770.10',
 132         '74.0.3729.117',
 133         '76.0.3775.4',
 134         '76.0.3775.3',
 135         '74.0.3729.116',
 136         '75.0.3770.9',
 137         '76.0.3775.2',
 138         '76.0.3775.1',
 139         '76.0.3775.0',
 140         '75.0.3770.8',
 141         '74.0.3729.115',
 142         '74.0.3729.114',
 143         '76.0.3774.1',
 144         '76.0.3774.0',
 145         '75.0.3770.7',
 146         '74.0.3729.113',
 147         '74.0.3729.112',
 148         '74.0.3729.111',
 149         '76.0.3773.1',
 150         '76.0.3773.0',
 151         '75.0.3770.6',
 152         '74.0.3729.110',
 153         '74.0.3729.109',
 154         '76.0.3772.1',
 155         '76.0.3772.0',
 156         '75.0.3770.5',
 157         '74.0.3729.108',
 158         '74.0.3729.107',
 159         '76.0.3771.1',
 160         '76.0.3771.0',
 161         '75.0.3770.4',
 162         '74.0.3729.106',
 163         '74.0.3729.105',
 164         '75.0.3770.3',
 165         '74.0.3729.104',
 166         '74.0.3729.103',
 167         '74.0.3729.102',
 168         '75.0.3770.2',
 169         '74.0.3729.101',
 170         '75.0.3770.1',
 171         '75.0.3770.0',
 172         '74.0.3729.100',
 173         '75.0.3769.5',
 174         '75.0.3769.4',
 175         '74.0.3729.99',
 176         '75.0.3769.3',
 177         '75.0.3769.2',
 178         '75.0.3768.6',
 179         '74.0.3729.98',
 180         '75.0.3769.1',
 181         '75.0.3769.0',
 182         '74.0.3729.97',
 183         '73.0.3683.119',
 184         '73.0.3683.118',
 185         '74.0.3729.96',
 186         '75.0.3768.5',
 187         '75.0.3768.4',
 188         '75.0.3768.3',
 189         '75.0.3768.2',
 190         '74.0.3729.95',
 191         '74.0.3729.94',
 192         '75.0.3768.1',
 193         '75.0.3768.0',
 194         '74.0.3729.93',
 195         '74.0.3729.92',
 196         '73.0.3683.117',
 197         '74.0.3729.91',
 198         '75.0.3766.3',
 199         '74.0.3729.90',
 200         '75.0.3767.2',
 201         '75.0.3767.1',
 202         '75.0.3767.0',
 203         '74.0.3729.89',
 204         '73.0.3683.116',
 205         '75.0.3766.2',
 206         '74.0.3729.88',
 207         '75.0.3766.1',
 208         '75.0.3766.0',
 209         '74.0.3729.87',
 210         '73.0.3683.115',
 211         '74.0.3729.86',
 212         '75.0.3765.1',
 213         '75.0.3765.0',
 214         '74.0.3729.85',
 215         '73.0.3683.114',
 216         '74.0.3729.84',
 217         '75.0.3764.1',
 218         '75.0.3764.0',
 219         '74.0.3729.83',
 220         '73.0.3683.113',
 221         '75.0.3763.2',
 222         '75.0.3761.4',
 223         '74.0.3729.82',
 224         '75.0.3763.1',
 225         '75.0.3763.0',
 226         '74.0.3729.81',
 227         '73.0.3683.112',
 228         '75.0.3762.1',
 229         '75.0.3762.0',
 230         '74.0.3729.80',
 231         '75.0.3761.3',
 232         '74.0.3729.79',
 233         '73.0.3683.111',
 234         '75.0.3761.2',
 235         '74.0.3729.78',
 236         '74.0.3729.77',
 237         '75.0.3761.1',
 238         '75.0.3761.0',
 239         '73.0.3683.110',
 240         '74.0.3729.76',
 241         '74.0.3729.75',
 242         '75.0.3760.0',
 243         '74.0.3729.74',
 244         '75.0.3759.8',
 245         '75.0.3759.7',
 246         '75.0.3759.6',
 247         '74.0.3729.73',
 248         '75.0.3759.5',
 249         '74.0.3729.72',
 250         '73.0.3683.109',
 251         '75.0.3759.4',
 252         '75.0.3759.3',
 253         '74.0.3729.71',
 254         '75.0.3759.2',
 255         '74.0.3729.70',
 256         '73.0.3683.108',
 257         '74.0.3729.69',
 258         '75.0.3759.1',
 259         '75.0.3759.0',
 260         '74.0.3729.68',
 261         '73.0.3683.107',
 262         '74.0.3729.67',
 263         '75.0.3758.1',
 264         '75.0.3758.0',
 265         '74.0.3729.66',
 266         '73.0.3683.106',
 267         '74.0.3729.65',
 268         '75.0.3757.1',
 269         '75.0.3757.0',
 270         '74.0.3729.64',
 271         '73.0.3683.105',
 272         '74.0.3729.63',
 273         '75.0.3756.1',
 274         '75.0.3756.0',
 275         '74.0.3729.62',
 276         '73.0.3683.104',
 277         '75.0.3755.3',
 278         '75.0.3755.2',
 279         '73.0.3683.103',
 280         '75.0.3755.1',
 281         '75.0.3755.0',
 282         '74.0.3729.61',
 283         '73.0.3683.102',
 284         '74.0.3729.60',
 285         '75.0.3754.2',
 286         '74.0.3729.59',
 287         '75.0.3753.4',
 288         '74.0.3729.58',
 289         '75.0.3754.1',
 290         '75.0.3754.0',
 291         '74.0.3729.57',
 292         '73.0.3683.101',
 293         '75.0.3753.3',
 294         '75.0.3752.2',
 295         '75.0.3753.2',
 296         '74.0.3729.56',
 297         '75.0.3753.1',
 298         '75.0.3753.0',
 299         '74.0.3729.55',
 300         '73.0.3683.100',
 301         '74.0.3729.54',
 302         '75.0.3752.1',
 303         '75.0.3752.0',
 304         '74.0.3729.53',
 305         '73.0.3683.99',
 306         '74.0.3729.52',
 307         '75.0.3751.1',
 308         '75.0.3751.0',
 309         '74.0.3729.51',
 310         '73.0.3683.98',
 311         '74.0.3729.50',
 312         '75.0.3750.0',
 313         '74.0.3729.49',
 314         '74.0.3729.48',
 315         '74.0.3729.47',
 316         '75.0.3749.3',
 317         '74.0.3729.46',
 318         '73.0.3683.97',
 319         '75.0.3749.2',
 320         '74.0.3729.45',
 321         '75.0.3749.1',
 322         '75.0.3749.0',
 323         '74.0.3729.44',
 324         '73.0.3683.96',
 325         '74.0.3729.43',
 326         '74.0.3729.42',
 327         '75.0.3748.1',
 328         '75.0.3748.0',
 329         '74.0.3729.41',
 330         '75.0.3747.1',
 331         '73.0.3683.95',
 332         '75.0.3746.4',
 333         '74.0.3729.40',
 334         '74.0.3729.39',
 335         '75.0.3747.0',
 336         '75.0.3746.3',
 337         '75.0.3746.2',
 338         '74.0.3729.38',
 339         '75.0.3746.1',
 340         '75.0.3746.0',
 341         '74.0.3729.37',
 342         '73.0.3683.94',
 343         '75.0.3745.5',
 344         '75.0.3745.4',
 345         '75.0.3745.3',
 346         '75.0.3745.2',
 347         '74.0.3729.36',
 348         '75.0.3745.1',
 349         '75.0.3745.0',
 350         '75.0.3744.2',
 351         '74.0.3729.35',
 352         '73.0.3683.93',
 353         '74.0.3729.34',
 354         '75.0.3744.1',
 355         '75.0.3744.0',
 356         '74.0.3729.33',
 357         '73.0.3683.92',
 358         '74.0.3729.32',
 359         '74.0.3729.31',
 360         '73.0.3683.91',
 361         '75.0.3741.2',
 362         '75.0.3740.5',
 363         '74.0.3729.30',
 364         '75.0.3741.1',
 365         '75.0.3741.0',
 366         '74.0.3729.29',
 367         '75.0.3740.4',
 368         '73.0.3683.90',
 369         '74.0.3729.28',
 370         '75.0.3740.3',
 371         '73.0.3683.89',
 372         '75.0.3740.2',
 373         '74.0.3729.27',
 374         '75.0.3740.1',
 375         '75.0.3740.0',
 376         '74.0.3729.26',
 377         '73.0.3683.88',
 378         '73.0.3683.87',
 379         '74.0.3729.25',
 380         '75.0.3739.1',
 381         '75.0.3739.0',
 382         '73.0.3683.86',
 383         '74.0.3729.24',
 384         '73.0.3683.85',
 385         '75.0.3738.4',
 386         '75.0.3738.3',
 387         '75.0.3738.2',
 388         '75.0.3738.1',
 389         '75.0.3738.0',
 390         '74.0.3729.23',
 391         '73.0.3683.84',
 392         '74.0.3729.22',
 393         '74.0.3729.21',
 394         '75.0.3737.1',
 395         '75.0.3737.0',
 396         '74.0.3729.20',
 397         '73.0.3683.83',
 398         '74.0.3729.19',
 399         '75.0.3736.1',
 400         '75.0.3736.0',
 401         '74.0.3729.18',
 402         '73.0.3683.82',
 403         '74.0.3729.17',
 404         '75.0.3735.1',
 405         '75.0.3735.0',
 406         '74.0.3729.16',
 407         '73.0.3683.81',
 408         '75.0.3734.1',
 409         '75.0.3734.0',
 410         '74.0.3729.15',
 411         '73.0.3683.80',
 412         '74.0.3729.14',
 413         '75.0.3733.1',
 414         '75.0.3733.0',
 415         '75.0.3732.1',
 416         '74.0.3729.13',
 417         '74.0.3729.12',
 418         '73.0.3683.79',
 419         '74.0.3729.11',
 420         '75.0.3732.0',
 421         '74.0.3729.10',
 422         '73.0.3683.78',
 423         '74.0.3729.9',
 424         '74.0.3729.8',
 425         '74.0.3729.7',
 426         '75.0.3731.3',
 427         '75.0.3731.2',
 428         '75.0.3731.0',
 429         '74.0.3729.6',
 430         '73.0.3683.77',
 431         '73.0.3683.76',
 432         '75.0.3730.5',
 433         '75.0.3730.4',
 434         '73.0.3683.75',
 435         '74.0.3729.5',
 436         '73.0.3683.74',
 437         '75.0.3730.3',
 438         '75.0.3730.2',
 439         '74.0.3729.4',
 440         '73.0.3683.73',
 441         '73.0.3683.72',
 442         '75.0.3730.1',
 443         '75.0.3730.0',
 444         '74.0.3729.3',
 445         '73.0.3683.71',
 446         '74.0.3729.2',
 447         '73.0.3683.70',
 448         '74.0.3729.1',
 449         '74.0.3729.0',
 450         '74.0.3726.4',
 451         '73.0.3683.69',
 452         '74.0.3726.3',
 453         '74.0.3728.0',
 454         '74.0.3726.2',
 455         '73.0.3683.68',
 456         '74.0.3726.1',
 457         '74.0.3726.0',
 458         '74.0.3725.4',
 459         '73.0.3683.67',
 460         '73.0.3683.66',
 461         '74.0.3725.3',
 462         '74.0.3725.2',
 463         '74.0.3725.1',
 464         '74.0.3724.8',
 465         '74.0.3725.0',
 466         '73.0.3683.65',
 467         '74.0.3724.7',
 468         '74.0.3724.6',
 469         '74.0.3724.5',
 470         '74.0.3724.4',
 471         '74.0.3724.3',
 472         '74.0.3724.2',
 473         '74.0.3724.1',
 474         '74.0.3724.0',
 475         '73.0.3683.64',
 476         '74.0.3723.1',
 477         '74.0.3723.0',
 478         '73.0.3683.63',
 479         '74.0.3722.1',
 480         '74.0.3722.0',
 481         '73.0.3683.62',
 482         '74.0.3718.9',
 483         '74.0.3702.3',
 484         '74.0.3721.3',
 485         '74.0.3721.2',
 486         '74.0.3721.1',
 487         '74.0.3721.0',
 488         '74.0.3720.6',
 489         '73.0.3683.61',
 490         '72.0.3626.122',
 491         '73.0.3683.60',
 492         '74.0.3720.5',
 493         '72.0.3626.121',
 494         '74.0.3718.8',
 495         '74.0.3720.4',
 496         '74.0.3720.3',
 497         '74.0.3718.7',
 498         '74.0.3720.2',
 499         '74.0.3720.1',
 500         '74.0.3720.0',
 501         '74.0.3718.6',
 502         '74.0.3719.5',
 503         '73.0.3683.59',
 504         '74.0.3718.5',
 505         '74.0.3718.4',
 506         '74.0.3719.4',
 507         '74.0.3719.3',
 508         '74.0.3719.2',
 509         '74.0.3719.1',
 510         '73.0.3683.58',
 511         '74.0.3719.0',
 512         '73.0.3683.57',
 513         '73.0.3683.56',
 514         '74.0.3718.3',
 515         '73.0.3683.55',
 516         '74.0.3718.2',
 517         '74.0.3718.1',
 518         '74.0.3718.0',
 519         '73.0.3683.54',
 520         '74.0.3717.2',
 521         '73.0.3683.53',
 522         '74.0.3717.1',
 523         '74.0.3717.0',
 524         '73.0.3683.52',
 525         '74.0.3716.1',
 526         '74.0.3716.0',
 527         '73.0.3683.51',
 528         '74.0.3715.1',
 529         '74.0.3715.0',
 530         '73.0.3683.50',
 531         '74.0.3711.2',
 532         '74.0.3714.2',
 533         '74.0.3713.3',
 534         '74.0.3714.1',
 535         '74.0.3714.0',
 536         '73.0.3683.49',
 537         '74.0.3713.1',
 538         '74.0.3713.0',
 539         '72.0.3626.120',
 540         '73.0.3683.48',
 541         '74.0.3712.2',
 542         '74.0.3712.1',
 543         '74.0.3712.0',
 544         '73.0.3683.47',
 545         '72.0.3626.119',
 546         '73.0.3683.46',
 547         '74.0.3710.2',
 548         '72.0.3626.118',
 549         '74.0.3711.1',
 550         '74.0.3711.0',
 551         '73.0.3683.45',
 552         '72.0.3626.117',
 553         '74.0.3710.1',
 554         '74.0.3710.0',
 555         '73.0.3683.44',
 556         '72.0.3626.116',
 557         '74.0.3709.1',
 558         '74.0.3709.0',
 559         '74.0.3704.9',
 560         '73.0.3683.43',
 561         '72.0.3626.115',
 562         '74.0.3704.8',
 563         '74.0.3704.7',
 564         '74.0.3708.0',
 565         '74.0.3706.7',
 566         '74.0.3704.6',
 567         '73.0.3683.42',
 568         '72.0.3626.114',
 569         '74.0.3706.6',
 570         '72.0.3626.113',
 571         '74.0.3704.5',
 572         '74.0.3706.5',
 573         '74.0.3706.4',
 574         '74.0.3706.3',
 575         '74.0.3706.2',
 576         '74.0.3706.1',
 577         '74.0.3706.0',
 578         '73.0.3683.41',
 579         '72.0.3626.112',
 580         '74.0.3705.1',
 581         '74.0.3705.0',
 582         '73.0.3683.40',
 583         '72.0.3626.111',
 584         '73.0.3683.39',
 585         '74.0.3704.4',
 586         '73.0.3683.38',
 587         '74.0.3704.3',
 588         '74.0.3704.2',
 589         '74.0.3704.1',
 590         '74.0.3704.0',
 591         '73.0.3683.37',
 592         '72.0.3626.110',
 593         '72.0.3626.109',
 594         '74.0.3703.3',
 595         '74.0.3703.2',
 596         '73.0.3683.36',
 597         '74.0.3703.1',
 598         '74.0.3703.0',
 599         '73.0.3683.35',
 600         '72.0.3626.108',
 601         '74.0.3702.2',
 602         '74.0.3699.3',
 603         '74.0.3702.1',
 604         '74.0.3702.0',
 605         '73.0.3683.34',
 606         '72.0.3626.107',
 607         '73.0.3683.33',
 608         '74.0.3701.1',
 609         '74.0.3701.0',
 610         '73.0.3683.32',
 611         '73.0.3683.31',
 612         '72.0.3626.105',
 613         '74.0.3700.1',
 614         '74.0.3700.0',
 615         '73.0.3683.29',
 616         '72.0.3626.103',
 617         '74.0.3699.2',
 618         '74.0.3699.1',
 619         '74.0.3699.0',
 620         '73.0.3683.28',
 621         '72.0.3626.102',
 622         '73.0.3683.27',
 623         '73.0.3683.26',
 624         '74.0.3698.0',
 625         '74.0.3696.2',
 626         '72.0.3626.101',
 627         '73.0.3683.25',
 628         '74.0.3696.1',
 629         '74.0.3696.0',
 630         '74.0.3694.8',
 631         '72.0.3626.100',
 632         '74.0.3694.7',
 633         '74.0.3694.6',
 634         '74.0.3694.5',
 635         '74.0.3694.4',
 636         '72.0.3626.99',
 637         '72.0.3626.98',
 638         '74.0.3694.3',
 639         '73.0.3683.24',
 640         '72.0.3626.97',
 641         '72.0.3626.96',
 642         '72.0.3626.95',
 643         '73.0.3683.23',
 644         '72.0.3626.94',
 645         '73.0.3683.22',
 646         '73.0.3683.21',
 647         '72.0.3626.93',
 648         '74.0.3694.2',
 649         '72.0.3626.92',
 650         '74.0.3694.1',
 651         '74.0.3694.0',
 652         '74.0.3693.6',
 653         '73.0.3683.20',
 654         '72.0.3626.91',
 655         '74.0.3693.5',
 656         '74.0.3693.4',
 657         '74.0.3693.3',
 658         '74.0.3693.2',
 659         '73.0.3683.19',
 660         '74.0.3693.1',
 661         '74.0.3693.0',
 662         '73.0.3683.18',
 663         '72.0.3626.90',
 664         '74.0.3692.1',
 665         '74.0.3692.0',
 666         '73.0.3683.17',
 667         '72.0.3626.89',
 668         '74.0.3687.3',
 669         '74.0.3691.1',
 670         '74.0.3691.0',
 671         '73.0.3683.16',
 672         '72.0.3626.88',
 673         '72.0.3626.87',
 674         '73.0.3683.15',
 675         '74.0.3690.1',
 676         '74.0.3690.0',
 677         '73.0.3683.14',
 678         '72.0.3626.86',
 679         '73.0.3683.13',
 680         '73.0.3683.12',
 681         '74.0.3689.1',
 682         '74.0.3689.0',
 683         '73.0.3683.11',
 684         '72.0.3626.85',
 685         '73.0.3683.10',
 686         '72.0.3626.84',
 687         '73.0.3683.9',
 688         '74.0.3688.1',
 689         '74.0.3688.0',
 690         '73.0.3683.8',
 691         '72.0.3626.83',
 692         '74.0.3687.2',
 693         '74.0.3687.1',
 694         '74.0.3687.0',
 695         '73.0.3683.7',
 696         '72.0.3626.82',
 697         '74.0.3686.4',
 698         '72.0.3626.81',
 699         '74.0.3686.3',
 700         '74.0.3686.2',
 701         '74.0.3686.1',
 702         '74.0.3686.0',
 703         '73.0.3683.6',
 704         '72.0.3626.80',
 705         '74.0.3685.1',
 706         '74.0.3685.0',
 707         '73.0.3683.5',
 708         '72.0.3626.79',
 709         '74.0.3684.1',
 710         '74.0.3684.0',
 711         '73.0.3683.4',
 712         '72.0.3626.78',
 713         '72.0.3626.77',
 714         '73.0.3683.3',
 715         '73.0.3683.2',
 716         '72.0.3626.76',
 717         '73.0.3683.1',
 718         '73.0.3683.0',
 719         '72.0.3626.75',
 720         '71.0.3578.141',
 721         '73.0.3682.1',
 722         '73.0.3682.0',
 723         '72.0.3626.74',
 724         '71.0.3578.140',
 725         '73.0.3681.4',
 726         '73.0.3681.3',
 727         '73.0.3681.2',
 728         '73.0.3681.1',
 729         '73.0.3681.0',
 730         '72.0.3626.73',
 731         '71.0.3578.139',
 732         '72.0.3626.72',
 733         '72.0.3626.71',
 734         '73.0.3680.1',
 735         '73.0.3680.0',
 736         '72.0.3626.70',
 737         '71.0.3578.138',
 738         '73.0.3678.2',
 739         '73.0.3679.1',
 740         '73.0.3679.0',
 741         '72.0.3626.69',
 742         '71.0.3578.137',
 743         '73.0.3678.1',
 744         '73.0.3678.0',
 745         '71.0.3578.136',
 746         '73.0.3677.1',
 747         '73.0.3677.0',
 748         '72.0.3626.68',
 749         '72.0.3626.67',
 750         '71.0.3578.135',
 751         '73.0.3676.1',
 752         '73.0.3676.0',
 753         '73.0.3674.2',
 754         '72.0.3626.66',
 755         '71.0.3578.134',
 756         '73.0.3674.1',
 757         '73.0.3674.0',
 758         '72.0.3626.65',
 759         '71.0.3578.133',
 760         '73.0.3673.2',
 761         '73.0.3673.1',
 762         '73.0.3673.0',
 763         '72.0.3626.64',
 764         '71.0.3578.132',
 765         '72.0.3626.63',
 766         '72.0.3626.62',
 767         '72.0.3626.61',
 768         '72.0.3626.60',
 769         '73.0.3672.1',
 770         '73.0.3672.0',
 771         '72.0.3626.59',
 772         '71.0.3578.131',
 773         '73.0.3671.3',
 774         '73.0.3671.2',
 775         '73.0.3671.1',
 776         '73.0.3671.0',
 777         '72.0.3626.58',
 778         '71.0.3578.130',
 779         '73.0.3670.1',
 780         '73.0.3670.0',
 781         '72.0.3626.57',
 782         '71.0.3578.129',
 783         '73.0.3669.1',
 784         '73.0.3669.0',
 785         '72.0.3626.56',
 786         '71.0.3578.128',
 787         '73.0.3668.2',
 788         '73.0.3668.1',
 789         '73.0.3668.0',
 790         '72.0.3626.55',
 791         '71.0.3578.127',
 792         '73.0.3667.2',
 793         '73.0.3667.1',
 794         '73.0.3667.0',
 795         '72.0.3626.54',
 796         '71.0.3578.126',
 797         '73.0.3666.1',
 798         '73.0.3666.0',
 799         '72.0.3626.53',
 800         '71.0.3578.125',
 801         '73.0.3665.4',
 802         '73.0.3665.3',
 803         '72.0.3626.52',
 804         '73.0.3665.2',
 805         '73.0.3664.4',
 806         '73.0.3665.1',
 807         '73.0.3665.0',
 808         '72.0.3626.51',
 809         '71.0.3578.124',
 810         '72.0.3626.50',
 811         '73.0.3664.3',
 812         '73.0.3664.2',
 813         '73.0.3664.1',
 814         '73.0.3664.0',
 815         '73.0.3663.2',
 816         '72.0.3626.49',
 817         '71.0.3578.123',
 818         '73.0.3663.1',
 819         '73.0.3663.0',
 820         '72.0.3626.48',
 821         '71.0.3578.122',
 822         '73.0.3662.1',
 823         '73.0.3662.0',
 824         '72.0.3626.47',
 825         '71.0.3578.121',
 826         '73.0.3661.1',
 827         '72.0.3626.46',
 828         '73.0.3661.0',
 829         '72.0.3626.45',
 830         '71.0.3578.120',
 831         '73.0.3660.2',
 832         '73.0.3660.1',
 833         '73.0.3660.0',
 834         '72.0.3626.44',
 835         '71.0.3578.119',
 836         '73.0.3659.1',
 837         '73.0.3659.0',
 838         '72.0.3626.43',
 839         '71.0.3578.118',
 840         '73.0.3658.1',
 841         '73.0.3658.0',
 842         '72.0.3626.42',
 843         '71.0.3578.117',
 844         '73.0.3657.1',
 845         '73.0.3657.0',
 846         '72.0.3626.41',
 847         '71.0.3578.116',
 848         '73.0.3656.1',
 849         '73.0.3656.0',
 850         '72.0.3626.40',
 851         '71.0.3578.115',
 852         '73.0.3655.1',
 853         '73.0.3655.0',
 854         '72.0.3626.39',
 855         '71.0.3578.114',
 856         '73.0.3654.1',
 857         '73.0.3654.0',
 858         '72.0.3626.38',
 859         '71.0.3578.113',
 860         '73.0.3653.1',
 861         '73.0.3653.0',
 862         '72.0.3626.37',
 863         '71.0.3578.112',
 864         '73.0.3652.1',
 865         '73.0.3652.0',
 866         '72.0.3626.36',
 867         '71.0.3578.111',
 868         '73.0.3651.1',
 869         '73.0.3651.0',
 870         '72.0.3626.35',
 871         '71.0.3578.110',
 872         '73.0.3650.1',
 873         '73.0.3650.0',
 874         '72.0.3626.34',
 875         '71.0.3578.109',
 876         '73.0.3649.1',
 877         '73.0.3649.0',
 878         '72.0.3626.33',
 879         '71.0.3578.108',
 880         '73.0.3648.2',
 881         '73.0.3648.1',
 882         '73.0.3648.0',
 883         '72.0.3626.32',
 884         '71.0.3578.107',
 885         '73.0.3647.2',
 886         '73.0.3647.1',
 887         '73.0.3647.0',
 888         '72.0.3626.31',
 889         '71.0.3578.106',
 890         '73.0.3635.3',
 891         '73.0.3646.2',
 892         '73.0.3646.1',
 893         '73.0.3646.0',
 894         '72.0.3626.30',
 895         '71.0.3578.105',
 896         '72.0.3626.29',
 897         '73.0.3645.2',
 898         '73.0.3645.1',
 899         '73.0.3645.0',
 900         '72.0.3626.28',
 901         '71.0.3578.104',
 902         '72.0.3626.27',
 903         '72.0.3626.26',
 904         '72.0.3626.25',
 905         '72.0.3626.24',
 906         '73.0.3644.0',
 907         '73.0.3643.2',
 908         '72.0.3626.23',
 909         '71.0.3578.103',
 910         '73.0.3643.1',
 911         '73.0.3643.0',
 912         '72.0.3626.22',
 913         '71.0.3578.102',
 914         '73.0.3642.1',
 915         '73.0.3642.0',
 916         '72.0.3626.21',
 917         '71.0.3578.101',
 918         '73.0.3641.1',
 919         '73.0.3641.0',
 920         '72.0.3626.20',
 921         '71.0.3578.100',
 922         '72.0.3626.19',
 923         '73.0.3640.1',
 924         '73.0.3640.0',
 925         '72.0.3626.18',
 926         '73.0.3639.1',
 927         '71.0.3578.99',
 928         '73.0.3639.0',
 929         '72.0.3626.17',
 930         '73.0.3638.2',
 931         '72.0.3626.16',
 932         '73.0.3638.1',
 933         '73.0.3638.0',
 934         '72.0.3626.15',
 935         '71.0.3578.98',
 936         '73.0.3635.2',
 937         '71.0.3578.97',
 938         '73.0.3637.1',
 939         '73.0.3637.0',
 940         '72.0.3626.14',
 941         '71.0.3578.96',
 942         '71.0.3578.95',
 943         '72.0.3626.13',
 944         '71.0.3578.94',
 945         '73.0.3636.2',
 946         '71.0.3578.93',
 947         '73.0.3636.1',
 948         '73.0.3636.0',
 949         '72.0.3626.12',
 950         '71.0.3578.92',
 951         '73.0.3635.1',
 952         '73.0.3635.0',
 953         '72.0.3626.11',
 954         '71.0.3578.91',
 955         '73.0.3634.2',
 956         '73.0.3634.1',
 957         '73.0.3634.0',
 958         '72.0.3626.10',
 959         '71.0.3578.90',
 960         '71.0.3578.89',
 961         '73.0.3633.2',
 962         '73.0.3633.1',
 963         '73.0.3633.0',
 964         '72.0.3610.4',
 965         '72.0.3626.9',
 966         '71.0.3578.88',
 967         '73.0.3632.5',
 968         '73.0.3632.4',
 969         '73.0.3632.3',
 970         '73.0.3632.2',
 971         '73.0.3632.1',
 972         '73.0.3632.0',
 973         '72.0.3626.8',
 974         '71.0.3578.87',
 975         '73.0.3631.2',
 976         '73.0.3631.1',
 977         '73.0.3631.0',
 978         '72.0.3626.7',
 979         '71.0.3578.86',
 980         '72.0.3626.6',
 981         '73.0.3630.1',
 982         '73.0.3630.0',
 983         '72.0.3626.5',
 984         '71.0.3578.85',
 985         '72.0.3626.4',
 986         '73.0.3628.3',
 987         '73.0.3628.2',
 988         '73.0.3629.1',
 989         '73.0.3629.0',
 990         '72.0.3626.3',
 991         '71.0.3578.84',
 992         '73.0.3628.1',
 993         '73.0.3628.0',
 994         '71.0.3578.83',
 995         '73.0.3627.1',
 996         '73.0.3627.0',
 997         '72.0.3626.2',
 998         '71.0.3578.82',
 999         '71.0.3578.81',
1000         '71.0.3578.80',
1001         '72.0.3626.1',
1002         '72.0.3626.0',
1003         '71.0.3578.79',
1004         '70.0.3538.124',
1005         '71.0.3578.78',
1006         '72.0.3623.4',
1007         '72.0.3625.2',
1008         '72.0.3625.1',
1009         '72.0.3625.0',
1010         '71.0.3578.77',
1011         '70.0.3538.123',
1012         '72.0.3624.4',
1013         '72.0.3624.3',
1014         '72.0.3624.2',
1015         '71.0.3578.76',
1016         '72.0.3624.1',
1017         '72.0.3624.0',
1018         '72.0.3623.3',
1019         '71.0.3578.75',
1020         '70.0.3538.122',
1021         '71.0.3578.74',
1022         '72.0.3623.2',
1023         '72.0.3610.3',
1024         '72.0.3623.1',
1025         '72.0.3623.0',
1026         '72.0.3622.3',
1027         '72.0.3622.2',
1028         '71.0.3578.73',
1029         '70.0.3538.121',
1030         '72.0.3622.1',
1031         '72.0.3622.0',
1032         '71.0.3578.72',
1033         '70.0.3538.120',
1034         '72.0.3621.1',
1035         '72.0.3621.0',
1036         '71.0.3578.71',
1037         '70.0.3538.119',
1038         '72.0.3620.1',
1039         '72.0.3620.0',
1040         '71.0.3578.70',
1041         '70.0.3538.118',
1042         '71.0.3578.69',
1043         '72.0.3619.1',
1044         '72.0.3619.0',
1045         '71.0.3578.68',
1046         '70.0.3538.117',
1047         '71.0.3578.67',
1048         '72.0.3618.1',
1049         '72.0.3618.0',
1050         '71.0.3578.66',
1051         '70.0.3538.116',
1052         '72.0.3617.1',
1053         '72.0.3617.0',
1054         '71.0.3578.65',
1055         '70.0.3538.115',
1056         '72.0.3602.3',
1057         '71.0.3578.64',
1058         '72.0.3616.1',
1059         '72.0.3616.0',
1060         '71.0.3578.63',
1061         '70.0.3538.114',
1062         '71.0.3578.62',
1063         '72.0.3615.1',
1064         '72.0.3615.0',
1065         '71.0.3578.61',
1066         '70.0.3538.113',
1067         '72.0.3614.1',
1068         '72.0.3614.0',
1069         '71.0.3578.60',
1070         '70.0.3538.112',
1071         '72.0.3613.1',
1072         '72.0.3613.0',
1073         '71.0.3578.59',
1074         '70.0.3538.111',
1075         '72.0.3612.2',
1076         '72.0.3612.1',
1077         '72.0.3612.0',
1078         '70.0.3538.110',
1079         '71.0.3578.58',
1080         '70.0.3538.109',
1081         '72.0.3611.2',
1082         '72.0.3611.1',
1083         '72.0.3611.0',
1084         '71.0.3578.57',
1085         '70.0.3538.108',
1086         '72.0.3610.2',
1087         '71.0.3578.56',
1088         '71.0.3578.55',
1089         '72.0.3610.1',
1090         '72.0.3610.0',
1091         '71.0.3578.54',
1092         '70.0.3538.107',
1093         '71.0.3578.53',
1094         '72.0.3609.3',
1095         '71.0.3578.52',
1096         '72.0.3609.2',
1097         '71.0.3578.51',
1098         '72.0.3608.5',
1099         '72.0.3609.1',
1100         '72.0.3609.0',
1101         '71.0.3578.50',
1102         '70.0.3538.106',
1103         '72.0.3608.4',
1104         '72.0.3608.3',
1105         '72.0.3608.2',
1106         '71.0.3578.49',
1107         '72.0.3608.1',
1108         '72.0.3608.0',
1109         '70.0.3538.105',
1110         '71.0.3578.48',
1111         '72.0.3607.1',
1112         '72.0.3607.0',
1113         '71.0.3578.47',
1114         '70.0.3538.104',
1115         '72.0.3606.2',
1116         '72.0.3606.1',
1117         '72.0.3606.0',
1118         '71.0.3578.46',
1119         '70.0.3538.103',
1120         '70.0.3538.102',
1121         '72.0.3605.3',
1122         '72.0.3605.2',
1123         '72.0.3605.1',
1124         '72.0.3605.0',
1125         '71.0.3578.45',
1126         '70.0.3538.101',
1127         '71.0.3578.44',
1128         '71.0.3578.43',
1129         '70.0.3538.100',
1130         '70.0.3538.99',
1131         '71.0.3578.42',
1132         '72.0.3604.1',
1133         '72.0.3604.0',
1134         '71.0.3578.41',
1135         '70.0.3538.98',
1136         '71.0.3578.40',
1137         '72.0.3603.2',
1138         '72.0.3603.1',
1139         '72.0.3603.0',
1140         '71.0.3578.39',
1141         '70.0.3538.97',
1142         '72.0.3602.2',
1143         '71.0.3578.38',
1144         '71.0.3578.37',
1145         '72.0.3602.1',
1146         '72.0.3602.0',
1147         '71.0.3578.36',
1148         '70.0.3538.96',
1149         '72.0.3601.1',
1150         '72.0.3601.0',
1151         '71.0.3578.35',
1152         '70.0.3538.95',
1153         '72.0.3600.1',
1154         '72.0.3600.0',
1155         '71.0.3578.34',
1156         '70.0.3538.94',
1157         '72.0.3599.3',
1158         '72.0.3599.2',
1159         '72.0.3599.1',
1160         '72.0.3599.0',
1161         '71.0.3578.33',
1162         '70.0.3538.93',
1163         '72.0.3598.1',
1164         '72.0.3598.0',
1165         '71.0.3578.32',
1166         '70.0.3538.87',
1167         '72.0.3597.1',
1168         '72.0.3597.0',
1169         '72.0.3596.2',
1170         '71.0.3578.31',
1171         '70.0.3538.86',
1172         '71.0.3578.30',
1173         '71.0.3578.29',
1174         '72.0.3596.1',
1175         '72.0.3596.0',
1176         '71.0.3578.28',
1177         '70.0.3538.85',
1178         '72.0.3595.2',
1179         '72.0.3591.3',
1180         '72.0.3595.1',
1181         '72.0.3595.0',
1182         '71.0.3578.27',
1183         '70.0.3538.84',
1184         '72.0.3594.1',
1185         '72.0.3594.0',
1186         '71.0.3578.26',
1187         '70.0.3538.83',
1188         '72.0.3593.2',
1189         '72.0.3593.1',
1190         '72.0.3593.0',
1191         '71.0.3578.25',
1192         '70.0.3538.82',
1193         '72.0.3589.3',
1194         '72.0.3592.2',
1195         '72.0.3592.1',
1196         '72.0.3592.0',
1197         '71.0.3578.24',
1198         '72.0.3589.2',
1199         '70.0.3538.81',
1200         '70.0.3538.80',
1201         '72.0.3591.2',
1202         '72.0.3591.1',
1203         '72.0.3591.0',
1204         '71.0.3578.23',
1205         '70.0.3538.79',
1206         '71.0.3578.22',
1207         '72.0.3590.1',
1208         '72.0.3590.0',
1209         '71.0.3578.21',
1210         '70.0.3538.78',
1211         '70.0.3538.77',
1212         '72.0.3589.1',
1213         '72.0.3589.0',
1214         '71.0.3578.20',
1215         '70.0.3538.76',
1216         '71.0.3578.19',
1217         '70.0.3538.75',
1218         '72.0.3588.1',
1219         '72.0.3588.0',
1220         '71.0.3578.18',
1221         '70.0.3538.74',
1222         '72.0.3586.2',
1223         '72.0.3587.0',
1224         '71.0.3578.17',
1225         '70.0.3538.73',
1226         '72.0.3586.1',
1227         '72.0.3586.0',
1228         '71.0.3578.16',
1229         '70.0.3538.72',
1230         '72.0.3585.1',
1231         '72.0.3585.0',
1232         '71.0.3578.15',
1233         '70.0.3538.71',
1234         '71.0.3578.14',
1235         '72.0.3584.1',
1236         '72.0.3584.0',
1237         '71.0.3578.13',
1238         '70.0.3538.70',
1239         '72.0.3583.2',
1240         '71.0.3578.12',
1241         '72.0.3583.1',
1242         '72.0.3583.0',
1243         '71.0.3578.11',
1244         '70.0.3538.69',
1245         '71.0.3578.10',
1246         '72.0.3582.0',
1247         '72.0.3581.4',
1248         '71.0.3578.9',
1249         '70.0.3538.67',
1250         '72.0.3581.3',
1251         '72.0.3581.2',
1252         '72.0.3581.1',
1253         '72.0.3581.0',
1254         '71.0.3578.8',
1255         '70.0.3538.66',
1256         '72.0.3580.1',
1257         '72.0.3580.0',
1258         '71.0.3578.7',
1259         '70.0.3538.65',
1260         '71.0.3578.6',
1261         '72.0.3579.1',
1262         '72.0.3579.0',
1263         '71.0.3578.5',
1264         '70.0.3538.64',
1265         '71.0.3578.4',
1266         '71.0.3578.3',
1267         '71.0.3578.2',
1268         '71.0.3578.1',
1269         '71.0.3578.0',
1270         '70.0.3538.63',
1271         '69.0.3497.128',
1272         '70.0.3538.62',
1273         '70.0.3538.61',
1274         '70.0.3538.60',
1275         '70.0.3538.59',
1276         '71.0.3577.1',
1277         '71.0.3577.0',
1278         '70.0.3538.58',
1279         '69.0.3497.127',
1280         '71.0.3576.2',
1281         '71.0.3576.1',
1282         '71.0.3576.0',
1283         '70.0.3538.57',
1284         '70.0.3538.56',
1285         '71.0.3575.2',
1286         '70.0.3538.55',
1287         '69.0.3497.126',
1288         '70.0.3538.54',
1289         '71.0.3575.1',
1290         '71.0.3575.0',
1291         '71.0.3574.1',
1292         '71.0.3574.0',
1293         '70.0.3538.53',
1294         '69.0.3497.125',
1295         '70.0.3538.52',
1296         '71.0.3573.1',
1297         '71.0.3573.0',
1298         '70.0.3538.51',
1299         '69.0.3497.124',
1300         '71.0.3572.1',
1301         '71.0.3572.0',
1302         '70.0.3538.50',
1303         '69.0.3497.123',
1304         '71.0.3571.2',
1305         '70.0.3538.49',
1306         '69.0.3497.122',
1307         '71.0.3571.1',
1308         '71.0.3571.0',
1309         '70.0.3538.48',
1310         '69.0.3497.121',
1311         '71.0.3570.1',
1312         '71.0.3570.0',
1313         '70.0.3538.47',
1314         '69.0.3497.120',
1315         '71.0.3568.2',
1316         '71.0.3569.1',
1317         '71.0.3569.0',
1318         '70.0.3538.46',
1319         '69.0.3497.119',
1320         '70.0.3538.45',
1321         '71.0.3568.1',
1322         '71.0.3568.0',
1323         '70.0.3538.44',
1324         '69.0.3497.118',
1325         '70.0.3538.43',
1326         '70.0.3538.42',
1327         '71.0.3567.1',
1328         '71.0.3567.0',
1329         '70.0.3538.41',
1330         '69.0.3497.117',
1331         '71.0.3566.1',
1332         '71.0.3566.0',
1333         '70.0.3538.40',
1334         '69.0.3497.116',
1335         '71.0.3565.1',
1336         '71.0.3565.0',
1337         '70.0.3538.39',
1338         '69.0.3497.115',
1339         '71.0.3564.1',
1340         '71.0.3564.0',
1341         '70.0.3538.38',
1342         '69.0.3497.114',
1343         '71.0.3563.0',
1344         '71.0.3562.2',
1345         '70.0.3538.37',
1346         '69.0.3497.113',
1347         '70.0.3538.36',
1348         '70.0.3538.35',
1349         '71.0.3562.1',
1350         '71.0.3562.0',
1351         '70.0.3538.34',
1352         '69.0.3497.112',
1353         '70.0.3538.33',
1354         '71.0.3561.1',
1355         '71.0.3561.0',
1356         '70.0.3538.32',
1357         '69.0.3497.111',
1358         '71.0.3559.6',
1359         '71.0.3560.1',
1360         '71.0.3560.0',
1361         '71.0.3559.5',
1362         '71.0.3559.4',
1363         '70.0.3538.31',
1364         '69.0.3497.110',
1365         '71.0.3559.3',
1366         '70.0.3538.30',
1367         '69.0.3497.109',
1368         '71.0.3559.2',
1369         '71.0.3559.1',
1370         '71.0.3559.0',
1371         '70.0.3538.29',
1372         '69.0.3497.108',
1373         '71.0.3558.2',
1374         '71.0.3558.1',
1375         '71.0.3558.0',
1376         '70.0.3538.28',
1377         '69.0.3497.107',
1378         '71.0.3557.2',
1379         '71.0.3557.1',
1380         '71.0.3557.0',
1381         '70.0.3538.27',
1382         '69.0.3497.106',
1383         '71.0.3554.4',
1384         '70.0.3538.26',
1385         '71.0.3556.1',
1386         '71.0.3556.0',
1387         '70.0.3538.25',
1388         '71.0.3554.3',
1389         '69.0.3497.105',
1390         '71.0.3554.2',
1391         '70.0.3538.24',
1392         '69.0.3497.104',
1393         '71.0.3555.2',
1394         '70.0.3538.23',
1395         '71.0.3555.1',
1396         '71.0.3555.0',
1397         '70.0.3538.22',
1398         '69.0.3497.103',
1399         '71.0.3554.1',
1400         '71.0.3554.0',
1401         '70.0.3538.21',
1402         '69.0.3497.102',
1403         '71.0.3553.3',
1404         '70.0.3538.20',
1405         '69.0.3497.101',
1406         '71.0.3553.2',
1407         '69.0.3497.100',
1408         '71.0.3553.1',
1409         '71.0.3553.0',
1410         '70.0.3538.19',
1411         '69.0.3497.99',
1412         '69.0.3497.98',
1413         '69.0.3497.97',
1414         '71.0.3552.6',
1415         '71.0.3552.5',
1416         '71.0.3552.4',
1417         '71.0.3552.3',
1418         '71.0.3552.2',
1419         '71.0.3552.1',
1420         '71.0.3552.0',
1421         '70.0.3538.18',
1422         '69.0.3497.96',
1423         '71.0.3551.3',
1424         '71.0.3551.2',
1425         '71.0.3551.1',
1426         '71.0.3551.0',
1427         '70.0.3538.17',
1428         '69.0.3497.95',
1429         '71.0.3550.3',
1430         '71.0.3550.2',
1431         '71.0.3550.1',
1432         '71.0.3550.0',
1433         '70.0.3538.16',
1434         '69.0.3497.94',
1435         '71.0.3549.1',
1436         '71.0.3549.0',
1437         '70.0.3538.15',
1438         '69.0.3497.93',
1439         '69.0.3497.92',
1440         '71.0.3548.1',
1441         '71.0.3548.0',
1442         '70.0.3538.14',
1443         '69.0.3497.91',
1444         '71.0.3547.1',
1445         '71.0.3547.0',
1446         '70.0.3538.13',
1447         '69.0.3497.90',
1448         '71.0.3546.2',
1449         '69.0.3497.89',
1450         '71.0.3546.1',
1451         '71.0.3546.0',
1452         '70.0.3538.12',
1453         '69.0.3497.88',
1454         '71.0.3545.4',
1455         '71.0.3545.3',
1456         '71.0.3545.2',
1457         '71.0.3545.1',
1458         '71.0.3545.0',
1459         '70.0.3538.11',
1460         '69.0.3497.87',
1461         '71.0.3544.5',
1462         '71.0.3544.4',
1463         '71.0.3544.3',
1464         '71.0.3544.2',
1465         '71.0.3544.1',
1466         '71.0.3544.0',
1467         '69.0.3497.86',
1468         '70.0.3538.10',
1469         '69.0.3497.85',
1470         '70.0.3538.9',
1471         '69.0.3497.84',
1472         '71.0.3543.4',
1473         '70.0.3538.8',
1474         '71.0.3543.3',
1475         '71.0.3543.2',
1476         '71.0.3543.1',
1477         '71.0.3543.0',
1478         '70.0.3538.7',
1479         '69.0.3497.83',
1480         '71.0.3542.2',
1481         '71.0.3542.1',
1482         '71.0.3542.0',
1483         '70.0.3538.6',
1484         '69.0.3497.82',
1485         '69.0.3497.81',
1486         '71.0.3541.1',
1487         '71.0.3541.0',
1488         '70.0.3538.5',
1489         '69.0.3497.80',
1490         '71.0.3540.1',
1491         '71.0.3540.0',
1492         '70.0.3538.4',
1493         '69.0.3497.79',
1494         '70.0.3538.3',
1495         '71.0.3539.1',
1496         '71.0.3539.0',
1497         '69.0.3497.78',
1498         '68.0.3440.134',
1499         '69.0.3497.77',
1500         '70.0.3538.2',
1501         '70.0.3538.1',
1502         '70.0.3538.0',
1503         '69.0.3497.76',
1504         '68.0.3440.133',
1505         '69.0.3497.75',
1506         '70.0.3537.2',
1507         '70.0.3537.1',
1508         '70.0.3537.0',
1509         '69.0.3497.74',
1510         '68.0.3440.132',
1511         '70.0.3536.0',
1512         '70.0.3535.5',
1513         '70.0.3535.4',
1514         '70.0.3535.3',
1515         '69.0.3497.73',
1516         '68.0.3440.131',
1517         '70.0.3532.8',
1518         '70.0.3532.7',
1519         '69.0.3497.72',
1520         '69.0.3497.71',
1521         '70.0.3535.2',
1522         '70.0.3535.1',
1523         '70.0.3535.0',
1524         '69.0.3497.70',
1525         '68.0.3440.130',
1526         '69.0.3497.69',
1527         '68.0.3440.129',
1528         '70.0.3534.4',
1529         '70.0.3534.3',
1530         '70.0.3534.2',
1531         '70.0.3534.1',
1532         '70.0.3534.0',
1533         '69.0.3497.68',
1534         '68.0.3440.128',
1535         '70.0.3533.2',
1536         '70.0.3533.1',
1537         '70.0.3533.0',
1538         '69.0.3497.67',
1539         '68.0.3440.127',
1540         '70.0.3532.6',
1541         '70.0.3532.5',
1542         '70.0.3532.4',
1543         '69.0.3497.66',
1544         '68.0.3440.126',
1545         '70.0.3532.3',
1546         '70.0.3532.2',
1547         '70.0.3532.1',
1548         '69.0.3497.60',
1549         '69.0.3497.65',
1550         '69.0.3497.64',
1551         '70.0.3532.0',
1552         '70.0.3531.0',
1553         '70.0.3530.4',
1554         '70.0.3530.3',
1555         '70.0.3530.2',
1556         '69.0.3497.58',
1557         '68.0.3440.125',
1558         '69.0.3497.57',
1559         '69.0.3497.56',
1560         '69.0.3497.55',
1561         '69.0.3497.54',
1562         '70.0.3530.1',
1563         '70.0.3530.0',
1564         '69.0.3497.53',
1565         '68.0.3440.124',
1566         '69.0.3497.52',
1567         '70.0.3529.3',
1568         '70.0.3529.2',
1569         '70.0.3529.1',
1570         '70.0.3529.0',
1571         '69.0.3497.51',
1572         '70.0.3528.4',
1573         '68.0.3440.123',
1574         '70.0.3528.3',
1575         '70.0.3528.2',
1576         '70.0.3528.1',
1577         '70.0.3528.0',
1578         '69.0.3497.50',
1579         '68.0.3440.122',
1580         '70.0.3527.1',
1581         '70.0.3527.0',
1582         '69.0.3497.49',
1583         '68.0.3440.121',
1584         '70.0.3526.1',
1585         '70.0.3526.0',
1586         '68.0.3440.120',
1587         '69.0.3497.48',
1588         '69.0.3497.47',
1589         '68.0.3440.119',
1590         '68.0.3440.118',
1591         '70.0.3525.5',
1592         '70.0.3525.4',
1593         '70.0.3525.3',
1594         '68.0.3440.117',
1595         '69.0.3497.46',
1596         '70.0.3525.2',
1597         '70.0.3525.1',
1598         '70.0.3525.0',
1599         '69.0.3497.45',
1600         '68.0.3440.116',
1601         '70.0.3524.4',
1602         '70.0.3524.3',
1603         '69.0.3497.44',
1604         '70.0.3524.2',
1605         '70.0.3524.1',
1606         '70.0.3524.0',
1607         '70.0.3523.2',
1608         '69.0.3497.43',
1609         '68.0.3440.115',
1610         '70.0.3505.9',
1611         '69.0.3497.42',
1612         '70.0.3505.8',
1613         '70.0.3523.1',
1614         '70.0.3523.0',
1615         '69.0.3497.41',
1616         '68.0.3440.114',
1617         '70.0.3505.7',
1618         '69.0.3497.40',
1619         '70.0.3522.1',
1620         '70.0.3522.0',
1621         '70.0.3521.2',
1622         '69.0.3497.39',
1623         '68.0.3440.113',
1624         '70.0.3505.6',
1625         '70.0.3521.1',
1626         '70.0.3521.0',
1627         '69.0.3497.38',
1628         '68.0.3440.112',
1629         '70.0.3520.1',
1630         '70.0.3520.0',
1631         '69.0.3497.37',
1632         '68.0.3440.111',
1633         '70.0.3519.3',
1634         '70.0.3519.2',
1635         '70.0.3519.1',
1636         '70.0.3519.0',
1637         '69.0.3497.36',
1638         '68.0.3440.110',
1639         '70.0.3518.1',
1640         '70.0.3518.0',
1641         '69.0.3497.35',
1642         '69.0.3497.34',
1643         '68.0.3440.109',
1644         '70.0.3517.1',
1645         '70.0.3517.0',
1646         '69.0.3497.33',
1647         '68.0.3440.108',
1648         '69.0.3497.32',
1649         '70.0.3516.3',
1650         '70.0.3516.2',
1651         '70.0.3516.1',
1652         '70.0.3516.0',
1653         '69.0.3497.31',
1654         '68.0.3440.107',
1655         '70.0.3515.4',
1656         '68.0.3440.106',
1657         '70.0.3515.3',
1658         '70.0.3515.2',
1659         '70.0.3515.1',
1660         '70.0.3515.0',
1661         '69.0.3497.30',
1662         '68.0.3440.105',
1663         '68.0.3440.104',
1664         '70.0.3514.2',
1665         '70.0.3514.1',
1666         '70.0.3514.0',
1667         '69.0.3497.29',
1668         '68.0.3440.103',
1669         '70.0.3513.1',
1670         '70.0.3513.0',
1671         '69.0.3497.28',
1672     )
1673     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677     'User-Agent': random_user_agent(),
1678     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680     'Accept-Encoding': 'gzip, deflate',
1681     'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693     'January', 'February', 'March', 'April', 'May', 'June',
1694     'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697     'en': ENGLISH_MONTH_NAMES,
1698     'fr': [
1699         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705     'flv', 'f4v', 'f4a', 'f4b',
1706     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707     'mkv', 'mka', 'mk3d',
1708     'avi', 'divx',
1709     'mov',
1710     'asf', 'wmv', 'wma',
1711     '3gp', '3g2',
1712     'mp3',
1713     'flac',
1714     'ape',
1715     'wav',
1716     'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 # needed for sanitizing filenames in restricted mode
1719 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1720                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1721                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1722
1723 DATE_FORMATS = (
1724     '%d %B %Y',
1725     '%d %b %Y',
1726     '%B %d %Y',
1727     '%B %dst %Y',
1728     '%B %dnd %Y',
1729     '%B %drd %Y',
1730     '%B %dth %Y',
1731     '%b %d %Y',
1732     '%b %dst %Y',
1733     '%b %dnd %Y',
1734     '%b %drd %Y',
1735     '%b %dth %Y',
1736     '%b %dst %Y %I:%M',
1737     '%b %dnd %Y %I:%M',
1738     '%b %drd %Y %I:%M',
1739     '%b %dth %Y %I:%M',
1740     '%Y %m %d',
1741     '%Y-%m-%d',
1742     '%Y/%m/%d',
1743     '%Y/%m/%d %H:%M',
1744     '%Y/%m/%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M',
1746     '%Y-%m-%d %H:%M:%S',
1747     '%Y-%m-%d %H:%M:%S.%f',
1748     '%d.%m.%Y %H:%M',
1749     '%d.%m.%Y %H.%M',
1750     '%Y-%m-%dT%H:%M:%SZ',
1751     '%Y-%m-%dT%H:%M:%S.%fZ',
1752     '%Y-%m-%dT%H:%M:%S.%f0Z',
1753     '%Y-%m-%dT%H:%M:%S',
1754     '%Y-%m-%dT%H:%M:%S.%f',
1755     '%Y-%m-%dT%H:%M',
1756     '%b %d %Y at %H:%M',
1757     '%b %d %Y at %H:%M:%S',
1758     '%B %d %Y at %H:%M',
1759     '%B %d %Y at %H:%M:%S',
1760 )
1761
1762 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1763 DATE_FORMATS_DAY_FIRST.extend([
1764     '%d-%m-%Y',
1765     '%d.%m.%Y',
1766     '%d.%m.%y',
1767     '%d/%m/%Y',
1768     '%d/%m/%y',
1769     '%d/%m/%Y %H:%M:%S',
1770 ])
1771
1772 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1773 DATE_FORMATS_MONTH_FIRST.extend([
1774     '%m-%d-%Y',
1775     '%m.%d.%Y',
1776     '%m/%d/%Y',
1777     '%m/%d/%y',
1778     '%m/%d/%Y %H:%M:%S',
1779 ])
1780
1781 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1782 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1783
1784
1785 def preferredencoding():
1786     """Get preferred encoding.
1787
1788     Returns the best encoding scheme for the system, based on
1789     locale.getpreferredencoding() and some further tweaks.
1790     """
1791     try:
1792         pref = locale.getpreferredencoding()
1793         'TEST'.encode(pref)
1794     except Exception:
1795         pref = 'UTF-8'
1796
1797     return pref
1798
1799
1800 def write_json_file(obj, fn):
1801     """ Encode obj as JSON and write it to fn, atomically if possible """
1802
1803     fn = encodeFilename(fn)
1804     if sys.version_info < (3, 0) and sys.platform != 'win32':
1805         encoding = get_filesystem_encoding()
1806         # os.path.basename returns a bytes object, but NamedTemporaryFile
1807         # will fail if the filename contains non ascii characters unless we
1808         # use a unicode object
1809         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1810         # the same for os.path.dirname
1811         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1812     else:
1813         path_basename = os.path.basename
1814         path_dirname = os.path.dirname
1815
1816     args = {
1817         'suffix': '.tmp',
1818         'prefix': path_basename(fn) + '.',
1819         'dir': path_dirname(fn),
1820         'delete': False,
1821     }
1822
1823     # In Python 2.x, json.dump expects a bytestream.
1824     # In Python 3.x, it writes to a character stream
1825     if sys.version_info < (3, 0):
1826         args['mode'] = 'wb'
1827     else:
1828         args.update({
1829             'mode': 'w',
1830             'encoding': 'utf-8',
1831         })
1832
1833     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1834
1835     try:
1836         with tf:
1837             json.dump(obj, tf)
1838         if sys.platform == 'win32':
1839             # Need to remove existing file on Windows, else os.rename raises
1840             # WindowsError or FileExistsError.
1841             try:
1842                 os.unlink(fn)
1843             except OSError:
1844                 pass
1845         try:
1846             mask = os.umask(0)
1847             os.umask(mask)
1848             os.chmod(tf.name, 0o666 & ~mask)
1849         except OSError:
1850             pass
1851         os.rename(tf.name, fn)
1852     except Exception:
1853         try:
1854             os.remove(tf.name)
1855         except OSError:
1856             pass
1857         raise
1858
1859
1860 if sys.version_info >= (2, 7):
1861     def find_xpath_attr(node, xpath, key, val=None):
1862         """ Find the xpath xpath[@key=val] """
1863         assert re.match(r'^[a-zA-Z_-]+$', key)
1864         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1865         return node.find(expr)
1866 else:
1867     def find_xpath_attr(node, xpath, key, val=None):
1868         for f in node.findall(compat_xpath(xpath)):
1869             if key not in f.attrib:
1870                 continue
1871             if val is None or f.attrib.get(key) == val:
1872                 return f
1873         return None
1874
1875 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1876 # the namespace parameter
1877
1878
1879 def xpath_with_ns(path, ns_map):
1880     components = [c.split(':') for c in path.split('/')]
1881     replaced = []
1882     for c in components:
1883         if len(c) == 1:
1884             replaced.append(c[0])
1885         else:
1886             ns, tag = c
1887             replaced.append('{%s}%s' % (ns_map[ns], tag))
1888     return '/'.join(replaced)
1889
1890
1891 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1892     def _find_xpath(xpath):
1893         return node.find(compat_xpath(xpath))
1894
1895     if isinstance(xpath, (str, compat_str)):
1896         n = _find_xpath(xpath)
1897     else:
1898         for xp in xpath:
1899             n = _find_xpath(xp)
1900             if n is not None:
1901                 break
1902
1903     if n is None:
1904         if default is not NO_DEFAULT:
1905             return default
1906         elif fatal:
1907             name = xpath if name is None else name
1908             raise ExtractorError('Could not find XML element %s' % name)
1909         else:
1910             return None
1911     return n
1912
1913
1914 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1915     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1916     if n is None or n == default:
1917         return n
1918     if n.text is None:
1919         if default is not NO_DEFAULT:
1920             return default
1921         elif fatal:
1922             name = xpath if name is None else name
1923             raise ExtractorError('Could not find XML element\'s text %s' % name)
1924         else:
1925             return None
1926     return n.text
1927
1928
1929 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1930     n = find_xpath_attr(node, xpath, key)
1931     if n is None:
1932         if default is not NO_DEFAULT:
1933             return default
1934         elif fatal:
1935             name = '%s[@%s]' % (xpath, key) if name is None else name
1936             raise ExtractorError('Could not find XML attribute %s' % name)
1937         else:
1938             return None
1939     return n.attrib[key]
1940
1941
1942 def get_element_by_id(id, html):
1943     """Return the content of the tag with the specified ID in the passed HTML document"""
1944     return get_element_by_attribute('id', id, html)
1945
1946
1947 def get_element_by_class(class_name, html):
1948     """Return the content of the first tag with the specified class in the passed HTML document"""
1949     retval = get_elements_by_class(class_name, html)
1950     return retval[0] if retval else None
1951
1952
1953 def get_element_by_attribute(attribute, value, html, escape_value=True):
1954     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1955     return retval[0] if retval else None
1956
1957
1958 def get_elements_by_class(class_name, html):
1959     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1960     return get_elements_by_attribute(
1961         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1962         html, escape_value=False)
1963
1964
1965 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1966     """Return the content of the tag with the specified attribute in the passed HTML document"""
1967
1968     value = re.escape(value) if escape_value else value
1969
1970     retlist = []
1971     for m in re.finditer(r'''(?xs)
1972         <([a-zA-Z0-9:._-]+)
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974          \s+%s=['"]?%s['"]?
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976         \s*>
1977         (?P<content>.*?)
1978         </\1>
1979     ''' % (re.escape(attribute), value), html):
1980         res = m.group('content')
1981
1982         if res.startswith('"') or res.startswith("'"):
1983             res = res[1:-1]
1984
1985         retlist.append(unescapeHTML(res))
1986
1987     return retlist
1988
1989
1990 class HTMLAttributeParser(compat_HTMLParser):
1991     """Trivial HTML parser to gather the attributes for a single element"""
1992
1993     def __init__(self):
1994         self.attrs = {}
1995         compat_HTMLParser.__init__(self)
1996
1997     def handle_starttag(self, tag, attrs):
1998         self.attrs = dict(attrs)
1999
2000
2001 def extract_attributes(html_element):
2002     """Given a string for an HTML element such as
2003     <el
2004          a="foo" B="bar" c="&98;az" d=boz
2005          empty= noval entity="&amp;"
2006          sq='"' dq="'"
2007     >
2008     Decode and return a dictionary of attributes.
2009     {
2010         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2011         'empty': '', 'noval': None, 'entity': '&',
2012         'sq': '"', 'dq': '\''
2013     }.
2014     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2015     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2016     """
2017     parser = HTMLAttributeParser()
2018     try:
2019         parser.feed(html_element)
2020         parser.close()
2021     # Older Python may throw HTMLParseError in case of malformed HTML
2022     except compat_HTMLParseError:
2023         pass
2024     return parser.attrs
2025
2026
2027 def clean_html(html):
2028     """Clean an HTML snippet into a readable string"""
2029
2030     if html is None:  # Convenience for sanitizing descriptions etc.
2031         return html
2032
2033     # Newline vs <br />
2034     html = html.replace('\n', ' ')
2035     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2036     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2037     # Strip html tags
2038     html = re.sub('<.*?>', '', html)
2039     # Replace html entities
2040     html = unescapeHTML(html)
2041     return html.strip()
2042
2043
2044 def sanitize_open(filename, open_mode):
2045     """Try to open the given filename, and slightly tweak it if this fails.
2046
2047     Attempts to open the given filename. If this fails, it tries to change
2048     the filename slightly, step by step, until it's either able to open it
2049     or it fails and raises a final exception, like the standard open()
2050     function.
2051
2052     It returns the tuple (stream, definitive_file_name).
2053     """
2054     try:
2055         if filename == '-':
2056             if sys.platform == 'win32':
2057                 import msvcrt
2058                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2059             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2060         stream = open(encodeFilename(filename), open_mode)
2061         return (stream, filename)
2062     except (IOError, OSError) as err:
2063         if err.errno in (errno.EACCES,):
2064             raise
2065
2066         # In case of error, try to remove win32 forbidden chars
2067         alt_filename = sanitize_path(filename)
2068         if alt_filename == filename:
2069             raise
2070         else:
2071             # An exception here should be caught in the caller
2072             stream = open(encodeFilename(alt_filename), open_mode)
2073             return (stream, alt_filename)
2074
2075
2076 def timeconvert(timestr):
2077     """Convert RFC 2822 defined time string into system timestamp"""
2078     timestamp = None
2079     timetuple = email.utils.parsedate_tz(timestr)
2080     if timetuple is not None:
2081         timestamp = email.utils.mktime_tz(timetuple)
2082     return timestamp
2083
2084
2085 def sanitize_filename(s, restricted=False, is_id=False):
2086     """Sanitizes a string so it could be used as part of a filename.
2087     If restricted is set, use a stricter subset of allowed characters.
2088     Set is_id if this is not an arbitrary string, but an ID that should be kept
2089     if possible.
2090     """
2091     def replace_insane(char):
2092         if restricted and char in ACCENT_CHARS:
2093             return ACCENT_CHARS[char]
2094         if char == '?' or ord(char) < 32 or ord(char) == 127:
2095             return ''
2096         elif char == '"':
2097             return '' if restricted else '\''
2098         elif char == ':':
2099             return '_-' if restricted else ' -'
2100         elif char in '\\/|*<>':
2101             return '_'
2102         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2103             return '_'
2104         if restricted and ord(char) > 127:
2105             return '_'
2106         return char
2107
2108     # Handle timestamps
2109     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2110     result = ''.join(map(replace_insane, s))
2111     if not is_id:
2112         while '__' in result:
2113             result = result.replace('__', '_')
2114         result = result.strip('_')
2115         # Common case of "Foreign band name - English song title"
2116         if restricted and result.startswith('-_'):
2117             result = result[2:]
2118         if result.startswith('-'):
2119             result = '_' + result[len('-'):]
2120         result = result.lstrip('.')
2121         if not result:
2122             result = '_'
2123     return result
2124
2125
2126 def sanitize_path(s):
2127     """Sanitizes and normalizes path on Windows"""
2128     if sys.platform != 'win32':
2129         return s
2130     drive_or_unc, _ = os.path.splitdrive(s)
2131     if sys.version_info < (2, 7) and not drive_or_unc:
2132         drive_or_unc, _ = os.path.splitunc(s)
2133     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2134     if drive_or_unc:
2135         norm_path.pop(0)
2136     sanitized_path = [
2137         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2138         for path_part in norm_path]
2139     if drive_or_unc:
2140         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2141     return os.path.join(*sanitized_path)
2142
2143
2144 def sanitize_url(url):
2145     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2146     # the number of unwanted failures due to missing protocol
2147     if url.startswith('//'):
2148         return 'http:%s' % url
2149     # Fix some common typos seen so far
2150     COMMON_TYPOS = (
2151         # https://github.com/ytdl-org/youtube-dl/issues/15649
2152         (r'^httpss://', r'https://'),
2153         # https://bx1.be/lives/direct-tv/
2154         (r'^rmtp([es]?)://', r'rtmp\1://'),
2155     )
2156     for mistake, fixup in COMMON_TYPOS:
2157         if re.match(mistake, url):
2158             return re.sub(mistake, fixup, url)
2159     return url
2160
2161
2162 def sanitized_Request(url, *args, **kwargs):
2163     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2164
2165
2166 def expand_path(s):
2167     """Expand shell variables and ~"""
2168     return os.path.expandvars(compat_expanduser(s))
2169
2170
2171 def orderedSet(iterable):
2172     """ Remove all duplicates from the input iterable """
2173     res = []
2174     for el in iterable:
2175         if el not in res:
2176             res.append(el)
2177     return res
2178
2179
2180 def _htmlentity_transform(entity_with_semicolon):
2181     """Transforms an HTML entity to a character."""
2182     entity = entity_with_semicolon[:-1]
2183
2184     # Known non-numeric HTML entity
2185     if entity in compat_html_entities.name2codepoint:
2186         return compat_chr(compat_html_entities.name2codepoint[entity])
2187
2188     # TODO: HTML5 allows entities without a semicolon. For example,
2189     # '&Eacuteric' should be decoded as 'Éric'.
2190     if entity_with_semicolon in compat_html_entities_html5:
2191         return compat_html_entities_html5[entity_with_semicolon]
2192
2193     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2194     if mobj is not None:
2195         numstr = mobj.group(1)
2196         if numstr.startswith('x'):
2197             base = 16
2198             numstr = '0%s' % numstr
2199         else:
2200             base = 10
2201         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2202         try:
2203             return compat_chr(int(numstr, base))
2204         except ValueError:
2205             pass
2206
2207     # Unknown entity in name, return its literal representation
2208     return '&%s;' % entity
2209
2210
2211 def unescapeHTML(s):
2212     if s is None:
2213         return None
2214     assert type(s) == compat_str
2215
2216     return re.sub(
2217         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2218
2219
2220 def process_communicate_or_kill(p, *args, **kwargs):
2221     try:
2222         return p.communicate(*args, **kwargs)
2223     except BaseException:  # Including KeyboardInterrupt
2224         p.kill()
2225         p.wait()
2226         raise
2227
2228
2229 def get_subprocess_encoding():
2230     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2231         # For subprocess calls, encode with locale encoding
2232         # Refer to http://stackoverflow.com/a/9951851/35070
2233         encoding = preferredencoding()
2234     else:
2235         encoding = sys.getfilesystemencoding()
2236     if encoding is None:
2237         encoding = 'utf-8'
2238     return encoding
2239
2240
2241 def encodeFilename(s, for_subprocess=False):
2242     """
2243     @param s The name of the file
2244     """
2245
2246     assert type(s) == compat_str
2247
2248     # Python 3 has a Unicode API
2249     if sys.version_info >= (3, 0):
2250         return s
2251
2252     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2253     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2254     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2255     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2256         return s
2257
2258     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2259     if sys.platform.startswith('java'):
2260         return s
2261
2262     return s.encode(get_subprocess_encoding(), 'ignore')
2263
2264
2265 def decodeFilename(b, for_subprocess=False):
2266
2267     if sys.version_info >= (3, 0):
2268         return b
2269
2270     if not isinstance(b, bytes):
2271         return b
2272
2273     return b.decode(get_subprocess_encoding(), 'ignore')
2274
2275
2276 def encodeArgument(s):
2277     if not isinstance(s, compat_str):
2278         # Legacy code that uses byte strings
2279         # Uncomment the following line after fixing all post processors
2280         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2281         s = s.decode('ascii')
2282     return encodeFilename(s, True)
2283
2284
2285 def decodeArgument(b):
2286     return decodeFilename(b, True)
2287
2288
2289 def decodeOption(optval):
2290     if optval is None:
2291         return optval
2292     if isinstance(optval, bytes):
2293         optval = optval.decode(preferredencoding())
2294
2295     assert isinstance(optval, compat_str)
2296     return optval
2297
2298
2299 def formatSeconds(secs, delim=':'):
2300     if secs > 3600:
2301         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2302     elif secs > 60:
2303         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2304     else:
2305         return '%d' % secs
2306
2307
2308 def make_HTTPS_handler(params, **kwargs):
2309     opts_no_check_certificate = params.get('nocheckcertificate', False)
2310     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2311         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2312         if opts_no_check_certificate:
2313             context.check_hostname = False
2314             context.verify_mode = ssl.CERT_NONE
2315         try:
2316             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2317         except TypeError:
2318             # Python 2.7.8
2319             # (create_default_context present but HTTPSHandler has no context=)
2320             pass
2321
2322     if sys.version_info < (3, 2):
2323         return YoutubeDLHTTPSHandler(params, **kwargs)
2324     else:  # Python < 3.4
2325         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2326         context.verify_mode = (ssl.CERT_NONE
2327                                if opts_no_check_certificate
2328                                else ssl.CERT_REQUIRED)
2329         context.set_default_verify_paths()
2330         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2331
2332
2333 def bug_reports_message():
2334     if ytdl_is_updateable():
2335         update_cmd = 'type  youtube-dlc -U  to update'
2336     else:
2337         update_cmd = 'see  https://github.com/pukkandan/yt-dlp  on how to update'
2338     msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
2339     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2340     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2341     return msg
2342
2343
2344 class YoutubeDLError(Exception):
2345     """Base exception for YoutubeDL errors."""
2346     pass
2347
2348
2349 class ExtractorError(YoutubeDLError):
2350     """Error during info extraction."""
2351
2352     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2353         """ tb, if given, is the original traceback (so that it can be printed out).
2354         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2355         """
2356
2357         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2358             expected = True
2359         if video_id is not None:
2360             msg = video_id + ': ' + msg
2361         if cause:
2362             msg += ' (caused by %r)' % cause
2363         if not expected:
2364             msg += bug_reports_message()
2365         super(ExtractorError, self).__init__(msg)
2366
2367         self.traceback = tb
2368         self.exc_info = sys.exc_info()  # preserve original exception
2369         self.cause = cause
2370         self.video_id = video_id
2371
2372     def format_traceback(self):
2373         if self.traceback is None:
2374             return None
2375         return ''.join(traceback.format_tb(self.traceback))
2376
2377
2378 class UnsupportedError(ExtractorError):
2379     def __init__(self, url):
2380         super(UnsupportedError, self).__init__(
2381             'Unsupported URL: %s' % url, expected=True)
2382         self.url = url
2383
2384
2385 class RegexNotFoundError(ExtractorError):
2386     """Error when a regex didn't match"""
2387     pass
2388
2389
2390 class GeoRestrictedError(ExtractorError):
2391     """Geographic restriction Error exception.
2392
2393     This exception may be thrown when a video is not available from your
2394     geographic location due to geographic restrictions imposed by a website.
2395     """
2396
2397     def __init__(self, msg, countries=None):
2398         super(GeoRestrictedError, self).__init__(msg, expected=True)
2399         self.msg = msg
2400         self.countries = countries
2401
2402
2403 class DownloadError(YoutubeDLError):
2404     """Download Error exception.
2405
2406     This exception may be thrown by FileDownloader objects if they are not
2407     configured to continue on errors. They will contain the appropriate
2408     error message.
2409     """
2410
2411     def __init__(self, msg, exc_info=None):
2412         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2413         super(DownloadError, self).__init__(msg)
2414         self.exc_info = exc_info
2415
2416
2417 class SameFileError(YoutubeDLError):
2418     """Same File exception.
2419
2420     This exception will be thrown by FileDownloader objects if they detect
2421     multiple files would have to be downloaded to the same file on disk.
2422     """
2423     pass
2424
2425
2426 class PostProcessingError(YoutubeDLError):
2427     """Post Processing exception.
2428
2429     This exception may be raised by PostProcessor's .run() method to
2430     indicate an error in the postprocessing task.
2431     """
2432
2433     def __init__(self, msg):
2434         super(PostProcessingError, self).__init__(msg)
2435         self.msg = msg
2436
2437
2438 class ExistingVideoReached(YoutubeDLError):
2439     """ --max-downloads limit has been reached. """
2440     pass
2441
2442
2443 class RejectedVideoReached(YoutubeDLError):
2444     """ --max-downloads limit has been reached. """
2445     pass
2446
2447
2448 class MaxDownloadsReached(YoutubeDLError):
2449     """ --max-downloads limit has been reached. """
2450     pass
2451
2452
2453 class UnavailableVideoError(YoutubeDLError):
2454     """Unavailable Format exception.
2455
2456     This exception will be thrown when a video is requested
2457     in a format that is not available for that video.
2458     """
2459     pass
2460
2461
2462 class ContentTooShortError(YoutubeDLError):
2463     """Content Too Short exception.
2464
2465     This exception may be raised by FileDownloader objects when a file they
2466     download is too small for what the server announced first, indicating
2467     the connection was probably interrupted.
2468     """
2469
2470     def __init__(self, downloaded, expected):
2471         super(ContentTooShortError, self).__init__(
2472             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2473         )
2474         # Both in bytes
2475         self.downloaded = downloaded
2476         self.expected = expected
2477
2478
2479 class XAttrMetadataError(YoutubeDLError):
2480     def __init__(self, code=None, msg='Unknown error'):
2481         super(XAttrMetadataError, self).__init__(msg)
2482         self.code = code
2483         self.msg = msg
2484
2485         # Parsing code and msg
2486         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2487                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2488             self.reason = 'NO_SPACE'
2489         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2490             self.reason = 'VALUE_TOO_LONG'
2491         else:
2492             self.reason = 'NOT_SUPPORTED'
2493
2494
2495 class XAttrUnavailableError(YoutubeDLError):
2496     pass
2497
2498
2499 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2500     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2501     # expected HTTP responses to meet HTTP/1.0 or later (see also
2502     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2503     if sys.version_info < (3, 0):
2504         kwargs['strict'] = True
2505     hc = http_class(*args, **compat_kwargs(kwargs))
2506     source_address = ydl_handler._params.get('source_address')
2507
2508     if source_address is not None:
2509         # This is to workaround _create_connection() from socket where it will try all
2510         # address data from getaddrinfo() including IPv6. This filters the result from
2511         # getaddrinfo() based on the source_address value.
2512         # This is based on the cpython socket.create_connection() function.
2513         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2514         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2515             host, port = address
2516             err = None
2517             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2518             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2519             ip_addrs = [addr for addr in addrs if addr[0] == af]
2520             if addrs and not ip_addrs:
2521                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2522                 raise socket.error(
2523                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2524                     % (ip_version, source_address[0]))
2525             for res in ip_addrs:
2526                 af, socktype, proto, canonname, sa = res
2527                 sock = None
2528                 try:
2529                     sock = socket.socket(af, socktype, proto)
2530                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2531                         sock.settimeout(timeout)
2532                     sock.bind(source_address)
2533                     sock.connect(sa)
2534                     err = None  # Explicitly break reference cycle
2535                     return sock
2536                 except socket.error as _:
2537                     err = _
2538                     if sock is not None:
2539                         sock.close()
2540             if err is not None:
2541                 raise err
2542             else:
2543                 raise socket.error('getaddrinfo returns an empty list')
2544         if hasattr(hc, '_create_connection'):
2545             hc._create_connection = _create_connection
2546         sa = (source_address, 0)
2547         if hasattr(hc, 'source_address'):  # Python 2.7+
2548             hc.source_address = sa
2549         else:  # Python 2.6
2550             def _hc_connect(self, *args, **kwargs):
2551                 sock = _create_connection(
2552                     (self.host, self.port), self.timeout, sa)
2553                 if is_https:
2554                     self.sock = ssl.wrap_socket(
2555                         sock, self.key_file, self.cert_file,
2556                         ssl_version=ssl.PROTOCOL_TLSv1)
2557                 else:
2558                     self.sock = sock
2559             hc.connect = functools.partial(_hc_connect, hc)
2560
2561     return hc
2562
2563
2564 def handle_youtubedl_headers(headers):
2565     filtered_headers = headers
2566
2567     if 'Youtubedl-no-compression' in filtered_headers:
2568         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2569         del filtered_headers['Youtubedl-no-compression']
2570
2571     return filtered_headers
2572
2573
2574 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2575     """Handler for HTTP requests and responses.
2576
2577     This class, when installed with an OpenerDirector, automatically adds
2578     the standard headers to every HTTP request and handles gzipped and
2579     deflated responses from web servers. If compression is to be avoided in
2580     a particular request, the original request in the program code only has
2581     to include the HTTP header "Youtubedl-no-compression", which will be
2582     removed before making the real request.
2583
2584     Part of this code was copied from:
2585
2586     http://techknack.net/python-urllib2-handlers/
2587
2588     Andrew Rowls, the author of that code, agreed to release it to the
2589     public domain.
2590     """
2591
2592     def __init__(self, params, *args, **kwargs):
2593         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2594         self._params = params
2595
2596     def http_open(self, req):
2597         conn_class = compat_http_client.HTTPConnection
2598
2599         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2600         if socks_proxy:
2601             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2602             del req.headers['Ytdl-socks-proxy']
2603
2604         return self.do_open(functools.partial(
2605             _create_http_connection, self, conn_class, False),
2606             req)
2607
2608     @staticmethod
2609     def deflate(data):
2610         try:
2611             return zlib.decompress(data, -zlib.MAX_WBITS)
2612         except zlib.error:
2613             return zlib.decompress(data)
2614
2615     def http_request(self, req):
2616         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2617         # always respected by websites, some tend to give out URLs with non percent-encoded
2618         # non-ASCII characters (see telemb.py, ard.py [#3412])
2619         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2620         # To work around aforementioned issue we will replace request's original URL with
2621         # percent-encoded one
2622         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2623         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2624         url = req.get_full_url()
2625         url_escaped = escape_url(url)
2626
2627         # Substitute URL if any change after escaping
2628         if url != url_escaped:
2629             req = update_Request(req, url=url_escaped)
2630
2631         for h, v in std_headers.items():
2632             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2633             # The dict keys are capitalized because of this bug by urllib
2634             if h.capitalize() not in req.headers:
2635                 req.add_header(h, v)
2636
2637         req.headers = handle_youtubedl_headers(req.headers)
2638
2639         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2640             # Python 2.6 is brain-dead when it comes to fragments
2641             req._Request__original = req._Request__original.partition('#')[0]
2642             req._Request__r_type = req._Request__r_type.partition('#')[0]
2643
2644         return req
2645
2646     def http_response(self, req, resp):
2647         old_resp = resp
2648         # gzip
2649         if resp.headers.get('Content-encoding', '') == 'gzip':
2650             content = resp.read()
2651             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2652             try:
2653                 uncompressed = io.BytesIO(gz.read())
2654             except IOError as original_ioerror:
2655                 # There may be junk add the end of the file
2656                 # See http://stackoverflow.com/q/4928560/35070 for details
2657                 for i in range(1, 1024):
2658                     try:
2659                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2660                         uncompressed = io.BytesIO(gz.read())
2661                     except IOError:
2662                         continue
2663                     break
2664                 else:
2665                     raise original_ioerror
2666             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2667             resp.msg = old_resp.msg
2668             del resp.headers['Content-encoding']
2669         # deflate
2670         if resp.headers.get('Content-encoding', '') == 'deflate':
2671             gz = io.BytesIO(self.deflate(resp.read()))
2672             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2673             resp.msg = old_resp.msg
2674             del resp.headers['Content-encoding']
2675         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2676         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2677         if 300 <= resp.code < 400:
2678             location = resp.headers.get('Location')
2679             if location:
2680                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2681                 if sys.version_info >= (3, 0):
2682                     location = location.encode('iso-8859-1').decode('utf-8')
2683                 else:
2684                     location = location.decode('utf-8')
2685                 location_escaped = escape_url(location)
2686                 if location != location_escaped:
2687                     del resp.headers['Location']
2688                     if sys.version_info < (3, 0):
2689                         location_escaped = location_escaped.encode('utf-8')
2690                     resp.headers['Location'] = location_escaped
2691         return resp
2692
2693     https_request = http_request
2694     https_response = http_response
2695
2696
2697 def make_socks_conn_class(base_class, socks_proxy):
2698     assert issubclass(base_class, (
2699         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2700
2701     url_components = compat_urlparse.urlparse(socks_proxy)
2702     if url_components.scheme.lower() == 'socks5':
2703         socks_type = ProxyType.SOCKS5
2704     elif url_components.scheme.lower() in ('socks', 'socks4'):
2705         socks_type = ProxyType.SOCKS4
2706     elif url_components.scheme.lower() == 'socks4a':
2707         socks_type = ProxyType.SOCKS4A
2708
2709     def unquote_if_non_empty(s):
2710         if not s:
2711             return s
2712         return compat_urllib_parse_unquote_plus(s)
2713
2714     proxy_args = (
2715         socks_type,
2716         url_components.hostname, url_components.port or 1080,
2717         True,  # Remote DNS
2718         unquote_if_non_empty(url_components.username),
2719         unquote_if_non_empty(url_components.password),
2720     )
2721
2722     class SocksConnection(base_class):
2723         def connect(self):
2724             self.sock = sockssocket()
2725             self.sock.setproxy(*proxy_args)
2726             if type(self.timeout) in (int, float):
2727                 self.sock.settimeout(self.timeout)
2728             self.sock.connect((self.host, self.port))
2729
2730             if isinstance(self, compat_http_client.HTTPSConnection):
2731                 if hasattr(self, '_context'):  # Python > 2.6
2732                     self.sock = self._context.wrap_socket(
2733                         self.sock, server_hostname=self.host)
2734                 else:
2735                     self.sock = ssl.wrap_socket(self.sock)
2736
2737     return SocksConnection
2738
2739
2740 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2741     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2742         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2743         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2744         self._params = params
2745
2746     def https_open(self, req):
2747         kwargs = {}
2748         conn_class = self._https_conn_class
2749
2750         if hasattr(self, '_context'):  # python > 2.6
2751             kwargs['context'] = self._context
2752         if hasattr(self, '_check_hostname'):  # python 3.x
2753             kwargs['check_hostname'] = self._check_hostname
2754
2755         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2756         if socks_proxy:
2757             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2758             del req.headers['Ytdl-socks-proxy']
2759
2760         return self.do_open(functools.partial(
2761             _create_http_connection, self, conn_class, True),
2762             req, **kwargs)
2763
2764
2765 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2766     """
2767     See [1] for cookie file format.
2768
2769     1. https://curl.haxx.se/docs/http-cookies.html
2770     """
2771     _HTTPONLY_PREFIX = '#HttpOnly_'
2772     _ENTRY_LEN = 7
2773     _HEADER = '''# Netscape HTTP Cookie File
2774 # This file is generated by youtube-dlc.  Do not edit.
2775
2776 '''
2777     _CookieFileEntry = collections.namedtuple(
2778         'CookieFileEntry',
2779         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2780
2781     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2782         """
2783         Save cookies to a file.
2784
2785         Most of the code is taken from CPython 3.8 and slightly adapted
2786         to support cookie files with UTF-8 in both python 2 and 3.
2787         """
2788         if filename is None:
2789             if self.filename is not None:
2790                 filename = self.filename
2791             else:
2792                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2793
2794         # Store session cookies with `expires` set to 0 instead of an empty
2795         # string
2796         for cookie in self:
2797             if cookie.expires is None:
2798                 cookie.expires = 0
2799
2800         with io.open(filename, 'w', encoding='utf-8') as f:
2801             f.write(self._HEADER)
2802             now = time.time()
2803             for cookie in self:
2804                 if not ignore_discard and cookie.discard:
2805                     continue
2806                 if not ignore_expires and cookie.is_expired(now):
2807                     continue
2808                 if cookie.secure:
2809                     secure = 'TRUE'
2810                 else:
2811                     secure = 'FALSE'
2812                 if cookie.domain.startswith('.'):
2813                     initial_dot = 'TRUE'
2814                 else:
2815                     initial_dot = 'FALSE'
2816                 if cookie.expires is not None:
2817                     expires = compat_str(cookie.expires)
2818                 else:
2819                     expires = ''
2820                 if cookie.value is None:
2821                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2822                     # with no name, whereas http.cookiejar regards it as a
2823                     # cookie with no value.
2824                     name = ''
2825                     value = cookie.name
2826                 else:
2827                     name = cookie.name
2828                     value = cookie.value
2829                 f.write(
2830                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2831                                secure, expires, name, value]) + '\n')
2832
2833     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2834         """Load cookies from a file."""
2835         if filename is None:
2836             if self.filename is not None:
2837                 filename = self.filename
2838             else:
2839                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2840
2841         def prepare_line(line):
2842             if line.startswith(self._HTTPONLY_PREFIX):
2843                 line = line[len(self._HTTPONLY_PREFIX):]
2844             # comments and empty lines are fine
2845             if line.startswith('#') or not line.strip():
2846                 return line
2847             cookie_list = line.split('\t')
2848             if len(cookie_list) != self._ENTRY_LEN:
2849                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2850             cookie = self._CookieFileEntry(*cookie_list)
2851             if cookie.expires_at and not cookie.expires_at.isdigit():
2852                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2853             return line
2854
2855         cf = io.StringIO()
2856         with io.open(filename, encoding='utf-8') as f:
2857             for line in f:
2858                 try:
2859                     cf.write(prepare_line(line))
2860                 except compat_cookiejar.LoadError as e:
2861                     write_string(
2862                         'WARNING: skipping cookie file entry due to %s: %r\n'
2863                         % (e, line), sys.stderr)
2864                     continue
2865         cf.seek(0)
2866         self._really_load(cf, filename, ignore_discard, ignore_expires)
2867         # Session cookies are denoted by either `expires` field set to
2868         # an empty string or 0. MozillaCookieJar only recognizes the former
2869         # (see [1]). So we need force the latter to be recognized as session
2870         # cookies on our own.
2871         # Session cookies may be important for cookies-based authentication,
2872         # e.g. usually, when user does not check 'Remember me' check box while
2873         # logging in on a site, some important cookies are stored as session
2874         # cookies so that not recognizing them will result in failed login.
2875         # 1. https://bugs.python.org/issue17164
2876         for cookie in self:
2877             # Treat `expires=0` cookies as session cookies
2878             if cookie.expires == 0:
2879                 cookie.expires = None
2880                 cookie.discard = True
2881
2882
2883 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2884     def __init__(self, cookiejar=None):
2885         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2886
2887     def http_response(self, request, response):
2888         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2889         # characters in Set-Cookie HTTP header of last response (see
2890         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2891         # In order to at least prevent crashing we will percent encode Set-Cookie
2892         # header before HTTPCookieProcessor starts processing it.
2893         # if sys.version_info < (3, 0) and response.headers:
2894         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2895         #         set_cookie = response.headers.get(set_cookie_header)
2896         #         if set_cookie:
2897         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2898         #             if set_cookie != set_cookie_escaped:
2899         #                 del response.headers[set_cookie_header]
2900         #                 response.headers[set_cookie_header] = set_cookie_escaped
2901         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2902
2903     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2904     https_response = http_response
2905
2906
2907 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2908     if sys.version_info[0] < 3:
2909         def redirect_request(self, req, fp, code, msg, headers, newurl):
2910             # On python 2 urlh.geturl() may sometimes return redirect URL
2911             # as byte string instead of unicode. This workaround allows
2912             # to force it always return unicode.
2913             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2914
2915
2916 def extract_timezone(date_str):
2917     m = re.search(
2918         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2919         date_str)
2920     if not m:
2921         timezone = datetime.timedelta()
2922     else:
2923         date_str = date_str[:-len(m.group('tz'))]
2924         if not m.group('sign'):
2925             timezone = datetime.timedelta()
2926         else:
2927             sign = 1 if m.group('sign') == '+' else -1
2928             timezone = datetime.timedelta(
2929                 hours=sign * int(m.group('hours')),
2930                 minutes=sign * int(m.group('minutes')))
2931     return timezone, date_str
2932
2933
2934 def parse_iso8601(date_str, delimiter='T', timezone=None):
2935     """ Return a UNIX timestamp from the given date """
2936
2937     if date_str is None:
2938         return None
2939
2940     date_str = re.sub(r'\.[0-9]+', '', date_str)
2941
2942     if timezone is None:
2943         timezone, date_str = extract_timezone(date_str)
2944
2945     try:
2946         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2947         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2948         return calendar.timegm(dt.timetuple())
2949     except ValueError:
2950         pass
2951
2952
2953 def date_formats(day_first=True):
2954     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2955
2956
2957 def unified_strdate(date_str, day_first=True):
2958     """Return a string with the date in the format YYYYMMDD"""
2959
2960     if date_str is None:
2961         return None
2962     upload_date = None
2963     # Replace commas
2964     date_str = date_str.replace(',', ' ')
2965     # Remove AM/PM + timezone
2966     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2967     _, date_str = extract_timezone(date_str)
2968
2969     for expression in date_formats(day_first):
2970         try:
2971             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2972         except ValueError:
2973             pass
2974     if upload_date is None:
2975         timetuple = email.utils.parsedate_tz(date_str)
2976         if timetuple:
2977             try:
2978                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2979             except ValueError:
2980                 pass
2981     if upload_date is not None:
2982         return compat_str(upload_date)
2983
2984
2985 def unified_timestamp(date_str, day_first=True):
2986     if date_str is None:
2987         return None
2988
2989     date_str = re.sub(r'[,|]', '', date_str)
2990
2991     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2992     timezone, date_str = extract_timezone(date_str)
2993
2994     # Remove AM/PM + timezone
2995     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2996
2997     # Remove unrecognized timezones from ISO 8601 alike timestamps
2998     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2999     if m:
3000         date_str = date_str[:-len(m.group('tz'))]
3001
3002     # Python only supports microseconds, so remove nanoseconds
3003     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3004     if m:
3005         date_str = m.group(1)
3006
3007     for expression in date_formats(day_first):
3008         try:
3009             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3010             return calendar.timegm(dt.timetuple())
3011         except ValueError:
3012             pass
3013     timetuple = email.utils.parsedate_tz(date_str)
3014     if timetuple:
3015         return calendar.timegm(timetuple) + pm_delta * 3600
3016
3017
3018 def determine_ext(url, default_ext='unknown_video'):
3019     if url is None or '.' not in url:
3020         return default_ext
3021     guess = url.partition('?')[0].rpartition('.')[2]
3022     if re.match(r'^[A-Za-z0-9]+$', guess):
3023         return guess
3024     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3025     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3026         return guess.rstrip('/')
3027     else:
3028         return default_ext
3029
3030
3031 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3032     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3033
3034
3035 def date_from_str(date_str):
3036     """
3037     Return a datetime object from a string in the format YYYYMMDD or
3038     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3039     today = datetime.date.today()
3040     if date_str in ('now', 'today'):
3041         return today
3042     if date_str == 'yesterday':
3043         return today - datetime.timedelta(days=1)
3044     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3045     if match is not None:
3046         sign = match.group('sign')
3047         time = int(match.group('time'))
3048         if sign == '-':
3049             time = -time
3050         unit = match.group('unit')
3051         # A bad approximation?
3052         if unit == 'month':
3053             unit = 'day'
3054             time *= 30
3055         elif unit == 'year':
3056             unit = 'day'
3057             time *= 365
3058         unit += 's'
3059         delta = datetime.timedelta(**{unit: time})
3060         return today + delta
3061     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3062
3063
3064 def hyphenate_date(date_str):
3065     """
3066     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3067     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3068     if match is not None:
3069         return '-'.join(match.groups())
3070     else:
3071         return date_str
3072
3073
3074 class DateRange(object):
3075     """Represents a time interval between two dates"""
3076
3077     def __init__(self, start=None, end=None):
3078         """start and end must be strings in the format accepted by date"""
3079         if start is not None:
3080             self.start = date_from_str(start)
3081         else:
3082             self.start = datetime.datetime.min.date()
3083         if end is not None:
3084             self.end = date_from_str(end)
3085         else:
3086             self.end = datetime.datetime.max.date()
3087         if self.start > self.end:
3088             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3089
3090     @classmethod
3091     def day(cls, day):
3092         """Returns a range that only contains the given day"""
3093         return cls(day, day)
3094
3095     def __contains__(self, date):
3096         """Check if the date is in the range"""
3097         if not isinstance(date, datetime.date):
3098             date = date_from_str(date)
3099         return self.start <= date <= self.end
3100
3101     def __str__(self):
3102         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3103
3104
3105 def platform_name():
3106     """ Returns the platform name as a compat_str """
3107     res = platform.platform()
3108     if isinstance(res, bytes):
3109         res = res.decode(preferredencoding())
3110
3111     assert isinstance(res, compat_str)
3112     return res
3113
3114
3115 def _windows_write_string(s, out):
3116     """ Returns True if the string was written using special methods,
3117     False if it has yet to be written out."""
3118     # Adapted from http://stackoverflow.com/a/3259271/35070
3119
3120     import ctypes
3121     import ctypes.wintypes
3122
3123     WIN_OUTPUT_IDS = {
3124         1: -11,
3125         2: -12,
3126     }
3127
3128     try:
3129         fileno = out.fileno()
3130     except AttributeError:
3131         # If the output stream doesn't have a fileno, it's virtual
3132         return False
3133     except io.UnsupportedOperation:
3134         # Some strange Windows pseudo files?
3135         return False
3136     if fileno not in WIN_OUTPUT_IDS:
3137         return False
3138
3139     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3140         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3141         ('GetStdHandle', ctypes.windll.kernel32))
3142     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3143
3144     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3145         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3146         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3147         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3148     written = ctypes.wintypes.DWORD(0)
3149
3150     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3151     FILE_TYPE_CHAR = 0x0002
3152     FILE_TYPE_REMOTE = 0x8000
3153     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3154         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3155         ctypes.POINTER(ctypes.wintypes.DWORD))(
3156         ('GetConsoleMode', ctypes.windll.kernel32))
3157     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3158
3159     def not_a_console(handle):
3160         if handle == INVALID_HANDLE_VALUE or handle is None:
3161             return True
3162         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3163                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3164
3165     if not_a_console(h):
3166         return False
3167
3168     def next_nonbmp_pos(s):
3169         try:
3170             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3171         except StopIteration:
3172             return len(s)
3173
3174     while s:
3175         count = min(next_nonbmp_pos(s), 1024)
3176
3177         ret = WriteConsoleW(
3178             h, s, count if count else 2, ctypes.byref(written), None)
3179         if ret == 0:
3180             raise OSError('Failed to write string')
3181         if not count:  # We just wrote a non-BMP character
3182             assert written.value == 2
3183             s = s[1:]
3184         else:
3185             assert written.value > 0
3186             s = s[written.value:]
3187     return True
3188
3189
3190 def write_string(s, out=None, encoding=None):
3191     if out is None:
3192         out = sys.stderr
3193     assert type(s) == compat_str
3194
3195     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3196         if _windows_write_string(s, out):
3197             return
3198
3199     if ('b' in getattr(out, 'mode', '')
3200             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3201         byt = s.encode(encoding or preferredencoding(), 'ignore')
3202         out.write(byt)
3203     elif hasattr(out, 'buffer'):
3204         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3205         byt = s.encode(enc, 'ignore')
3206         out.buffer.write(byt)
3207     else:
3208         out.write(s)
3209     out.flush()
3210
3211
3212 def bytes_to_intlist(bs):
3213     if not bs:
3214         return []
3215     if isinstance(bs[0], int):  # Python 3
3216         return list(bs)
3217     else:
3218         return [ord(c) for c in bs]
3219
3220
3221 def intlist_to_bytes(xs):
3222     if not xs:
3223         return b''
3224     return compat_struct_pack('%dB' % len(xs), *xs)
3225
3226
3227 # Cross-platform file locking
3228 if sys.platform == 'win32':
3229     import ctypes.wintypes
3230     import msvcrt
3231
3232     class OVERLAPPED(ctypes.Structure):
3233         _fields_ = [
3234             ('Internal', ctypes.wintypes.LPVOID),
3235             ('InternalHigh', ctypes.wintypes.LPVOID),
3236             ('Offset', ctypes.wintypes.DWORD),
3237             ('OffsetHigh', ctypes.wintypes.DWORD),
3238             ('hEvent', ctypes.wintypes.HANDLE),
3239         ]
3240
3241     kernel32 = ctypes.windll.kernel32
3242     LockFileEx = kernel32.LockFileEx
3243     LockFileEx.argtypes = [
3244         ctypes.wintypes.HANDLE,     # hFile
3245         ctypes.wintypes.DWORD,      # dwFlags
3246         ctypes.wintypes.DWORD,      # dwReserved
3247         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3248         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3249         ctypes.POINTER(OVERLAPPED)  # Overlapped
3250     ]
3251     LockFileEx.restype = ctypes.wintypes.BOOL
3252     UnlockFileEx = kernel32.UnlockFileEx
3253     UnlockFileEx.argtypes = [
3254         ctypes.wintypes.HANDLE,     # hFile
3255         ctypes.wintypes.DWORD,      # dwReserved
3256         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3257         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3258         ctypes.POINTER(OVERLAPPED)  # Overlapped
3259     ]
3260     UnlockFileEx.restype = ctypes.wintypes.BOOL
3261     whole_low = 0xffffffff
3262     whole_high = 0x7fffffff
3263
3264     def _lock_file(f, exclusive):
3265         overlapped = OVERLAPPED()
3266         overlapped.Offset = 0
3267         overlapped.OffsetHigh = 0
3268         overlapped.hEvent = 0
3269         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3270         handle = msvcrt.get_osfhandle(f.fileno())
3271         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3272                           whole_low, whole_high, f._lock_file_overlapped_p):
3273             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3274
3275     def _unlock_file(f):
3276         assert f._lock_file_overlapped_p
3277         handle = msvcrt.get_osfhandle(f.fileno())
3278         if not UnlockFileEx(handle, 0,
3279                             whole_low, whole_high, f._lock_file_overlapped_p):
3280             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3281
3282 else:
3283     # Some platforms, such as Jython, is missing fcntl
3284     try:
3285         import fcntl
3286
3287         def _lock_file(f, exclusive):
3288             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3289
3290         def _unlock_file(f):
3291             fcntl.flock(f, fcntl.LOCK_UN)
3292     except ImportError:
3293         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3294
3295         def _lock_file(f, exclusive):
3296             raise IOError(UNSUPPORTED_MSG)
3297
3298         def _unlock_file(f):
3299             raise IOError(UNSUPPORTED_MSG)
3300
3301
3302 class locked_file(object):
3303     def __init__(self, filename, mode, encoding=None):
3304         assert mode in ['r', 'a', 'w']
3305         self.f = io.open(filename, mode, encoding=encoding)
3306         self.mode = mode
3307
3308     def __enter__(self):
3309         exclusive = self.mode != 'r'
3310         try:
3311             _lock_file(self.f, exclusive)
3312         except IOError:
3313             self.f.close()
3314             raise
3315         return self
3316
3317     def __exit__(self, etype, value, traceback):
3318         try:
3319             _unlock_file(self.f)
3320         finally:
3321             self.f.close()
3322
3323     def __iter__(self):
3324         return iter(self.f)
3325
3326     def write(self, *args):
3327         return self.f.write(*args)
3328
3329     def read(self, *args):
3330         return self.f.read(*args)
3331
3332
3333 def get_filesystem_encoding():
3334     encoding = sys.getfilesystemencoding()
3335     return encoding if encoding is not None else 'utf-8'
3336
3337
3338 def shell_quote(args):
3339     quoted_args = []
3340     encoding = get_filesystem_encoding()
3341     for a in args:
3342         if isinstance(a, bytes):
3343             # We may get a filename encoded with 'encodeFilename'
3344             a = a.decode(encoding)
3345         quoted_args.append(compat_shlex_quote(a))
3346     return ' '.join(quoted_args)
3347
3348
3349 def smuggle_url(url, data):
3350     """ Pass additional data in a URL for internal use. """
3351
3352     url, idata = unsmuggle_url(url, {})
3353     data.update(idata)
3354     sdata = compat_urllib_parse_urlencode(
3355         {'__youtubedl_smuggle': json.dumps(data)})
3356     return url + '#' + sdata
3357
3358
3359 def unsmuggle_url(smug_url, default=None):
3360     if '#__youtubedl_smuggle' not in smug_url:
3361         return smug_url, default
3362     url, _, sdata = smug_url.rpartition('#')
3363     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3364     data = json.loads(jsond)
3365     return url, data
3366
3367
3368 def format_bytes(bytes):
3369     if bytes is None:
3370         return 'N/A'
3371     if type(bytes) is str:
3372         bytes = float(bytes)
3373     if bytes == 0.0:
3374         exponent = 0
3375     else:
3376         exponent = int(math.log(bytes, 1024.0))
3377     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3378     converted = float(bytes) / float(1024 ** exponent)
3379     return '%.2f%s' % (converted, suffix)
3380
3381
3382 def lookup_unit_table(unit_table, s):
3383     units_re = '|'.join(re.escape(u) for u in unit_table)
3384     m = re.match(
3385         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3386     if not m:
3387         return None
3388     num_str = m.group('num').replace(',', '.')
3389     mult = unit_table[m.group('unit')]
3390     return int(float(num_str) * mult)
3391
3392
3393 def parse_filesize(s):
3394     if s is None:
3395         return None
3396
3397     # The lower-case forms are of course incorrect and unofficial,
3398     # but we support those too
3399     _UNIT_TABLE = {
3400         'B': 1,
3401         'b': 1,
3402         'bytes': 1,
3403         'KiB': 1024,
3404         'KB': 1000,
3405         'kB': 1024,
3406         'Kb': 1000,
3407         'kb': 1000,
3408         'kilobytes': 1000,
3409         'kibibytes': 1024,
3410         'MiB': 1024 ** 2,
3411         'MB': 1000 ** 2,
3412         'mB': 1024 ** 2,
3413         'Mb': 1000 ** 2,
3414         'mb': 1000 ** 2,
3415         'megabytes': 1000 ** 2,
3416         'mebibytes': 1024 ** 2,
3417         'GiB': 1024 ** 3,
3418         'GB': 1000 ** 3,
3419         'gB': 1024 ** 3,
3420         'Gb': 1000 ** 3,
3421         'gb': 1000 ** 3,
3422         'gigabytes': 1000 ** 3,
3423         'gibibytes': 1024 ** 3,
3424         'TiB': 1024 ** 4,
3425         'TB': 1000 ** 4,
3426         'tB': 1024 ** 4,
3427         'Tb': 1000 ** 4,
3428         'tb': 1000 ** 4,
3429         'terabytes': 1000 ** 4,
3430         'tebibytes': 1024 ** 4,
3431         'PiB': 1024 ** 5,
3432         'PB': 1000 ** 5,
3433         'pB': 1024 ** 5,
3434         'Pb': 1000 ** 5,
3435         'pb': 1000 ** 5,
3436         'petabytes': 1000 ** 5,
3437         'pebibytes': 1024 ** 5,
3438         'EiB': 1024 ** 6,
3439         'EB': 1000 ** 6,
3440         'eB': 1024 ** 6,
3441         'Eb': 1000 ** 6,
3442         'eb': 1000 ** 6,
3443         'exabytes': 1000 ** 6,
3444         'exbibytes': 1024 ** 6,
3445         'ZiB': 1024 ** 7,
3446         'ZB': 1000 ** 7,
3447         'zB': 1024 ** 7,
3448         'Zb': 1000 ** 7,
3449         'zb': 1000 ** 7,
3450         'zettabytes': 1000 ** 7,
3451         'zebibytes': 1024 ** 7,
3452         'YiB': 1024 ** 8,
3453         'YB': 1000 ** 8,
3454         'yB': 1024 ** 8,
3455         'Yb': 1000 ** 8,
3456         'yb': 1000 ** 8,
3457         'yottabytes': 1000 ** 8,
3458         'yobibytes': 1024 ** 8,
3459     }
3460
3461     return lookup_unit_table(_UNIT_TABLE, s)
3462
3463
3464 def parse_count(s):
3465     if s is None:
3466         return None
3467
3468     s = s.strip()
3469
3470     if re.match(r'^[\d,.]+$', s):
3471         return str_to_int(s)
3472
3473     _UNIT_TABLE = {
3474         'k': 1000,
3475         'K': 1000,
3476         'm': 1000 ** 2,
3477         'M': 1000 ** 2,
3478         'kk': 1000 ** 2,
3479         'KK': 1000 ** 2,
3480     }
3481
3482     return lookup_unit_table(_UNIT_TABLE, s)
3483
3484
3485 def parse_resolution(s):
3486     if s is None:
3487         return {}
3488
3489     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3490     if mobj:
3491         return {
3492             'width': int(mobj.group('w')),
3493             'height': int(mobj.group('h')),
3494         }
3495
3496     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3497     if mobj:
3498         return {'height': int(mobj.group(1))}
3499
3500     mobj = re.search(r'\b([48])[kK]\b', s)
3501     if mobj:
3502         return {'height': int(mobj.group(1)) * 540}
3503
3504     return {}
3505
3506
3507 def parse_bitrate(s):
3508     if not isinstance(s, compat_str):
3509         return
3510     mobj = re.search(r'\b(\d+)\s*kbps', s)
3511     if mobj:
3512         return int(mobj.group(1))
3513
3514
3515 def month_by_name(name, lang='en'):
3516     """ Return the number of a month by (locale-independently) English name """
3517
3518     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3519
3520     try:
3521         return month_names.index(name) + 1
3522     except ValueError:
3523         return None
3524
3525
3526 def month_by_abbreviation(abbrev):
3527     """ Return the number of a month by (locale-independently) English
3528         abbreviations """
3529
3530     try:
3531         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3532     except ValueError:
3533         return None
3534
3535
3536 def fix_xml_ampersands(xml_str):
3537     """Replace all the '&' by '&amp;' in XML"""
3538     return re.sub(
3539         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3540         '&amp;',
3541         xml_str)
3542
3543
3544 def setproctitle(title):
3545     assert isinstance(title, compat_str)
3546
3547     # ctypes in Jython is not complete
3548     # http://bugs.jython.org/issue2148
3549     if sys.platform.startswith('java'):
3550         return
3551
3552     try:
3553         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3554     except OSError:
3555         return
3556     except TypeError:
3557         # LoadLibrary in Windows Python 2.7.13 only expects
3558         # a bytestring, but since unicode_literals turns
3559         # every string into a unicode string, it fails.
3560         return
3561     title_bytes = title.encode('utf-8')
3562     buf = ctypes.create_string_buffer(len(title_bytes))
3563     buf.value = title_bytes
3564     try:
3565         libc.prctl(15, buf, 0, 0, 0)
3566     except AttributeError:
3567         return  # Strange libc, just skip this
3568
3569
3570 def remove_start(s, start):
3571     return s[len(start):] if s is not None and s.startswith(start) else s
3572
3573
3574 def remove_end(s, end):
3575     return s[:-len(end)] if s is not None and s.endswith(end) else s
3576
3577
3578 def remove_quotes(s):
3579     if s is None or len(s) < 2:
3580         return s
3581     for quote in ('"', "'", ):
3582         if s[0] == quote and s[-1] == quote:
3583             return s[1:-1]
3584     return s
3585
3586
3587 def get_domain(url):
3588     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3589     return domain.group('domain') if domain else None
3590
3591
3592 def url_basename(url):
3593     path = compat_urlparse.urlparse(url).path
3594     return path.strip('/').split('/')[-1]
3595
3596
3597 def base_url(url):
3598     return re.match(r'https?://[^?#&]+/', url).group()
3599
3600
3601 def urljoin(base, path):
3602     if isinstance(path, bytes):
3603         path = path.decode('utf-8')
3604     if not isinstance(path, compat_str) or not path:
3605         return None
3606     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3607         return path
3608     if isinstance(base, bytes):
3609         base = base.decode('utf-8')
3610     if not isinstance(base, compat_str) or not re.match(
3611             r'^(?:https?:)?//', base):
3612         return None
3613     return compat_urlparse.urljoin(base, path)
3614
3615
3616 class HEADRequest(compat_urllib_request.Request):
3617     def get_method(self):
3618         return 'HEAD'
3619
3620
3621 class PUTRequest(compat_urllib_request.Request):
3622     def get_method(self):
3623         return 'PUT'
3624
3625
3626 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3627     if get_attr:
3628         if v is not None:
3629             v = getattr(v, get_attr, None)
3630     if v == '':
3631         v = None
3632     if v is None:
3633         return default
3634     try:
3635         return int(v) * invscale // scale
3636     except (ValueError, TypeError):
3637         return default
3638
3639
3640 def str_or_none(v, default=None):
3641     return default if v is None else compat_str(v)
3642
3643
3644 def str_to_int(int_str):
3645     """ A more relaxed version of int_or_none """
3646     if isinstance(int_str, compat_integer_types):
3647         return int_str
3648     elif isinstance(int_str, compat_str):
3649         int_str = re.sub(r'[,\.\+]', '', int_str)
3650         return int_or_none(int_str)
3651
3652
3653 def float_or_none(v, scale=1, invscale=1, default=None):
3654     if v is None:
3655         return default
3656     try:
3657         return float(v) * invscale / scale
3658     except (ValueError, TypeError):
3659         return default
3660
3661
3662 def bool_or_none(v, default=None):
3663     return v if isinstance(v, bool) else default
3664
3665
3666 def strip_or_none(v, default=None):
3667     return v.strip() if isinstance(v, compat_str) else default
3668
3669
3670 def url_or_none(url):
3671     if not url or not isinstance(url, compat_str):
3672         return None
3673     url = url.strip()
3674     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3675
3676
3677 def strftime_or_none(timestamp, date_format, default=None):
3678     datetime_object = None
3679     try:
3680         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3681             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3682         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3683             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3684         return datetime_object.strftime(date_format)
3685     except (ValueError, TypeError, AttributeError):
3686         return default
3687
3688
3689 def parse_duration(s):
3690     if not isinstance(s, compat_basestring):
3691         return None
3692
3693     s = s.strip()
3694
3695     days, hours, mins, secs, ms = [None] * 5
3696     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3697     if m:
3698         days, hours, mins, secs, ms = m.groups()
3699     else:
3700         m = re.match(
3701             r'''(?ix)(?:P?
3702                 (?:
3703                     [0-9]+\s*y(?:ears?)?\s*
3704                 )?
3705                 (?:
3706                     [0-9]+\s*m(?:onths?)?\s*
3707                 )?
3708                 (?:
3709                     [0-9]+\s*w(?:eeks?)?\s*
3710                 )?
3711                 (?:
3712                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3713                 )?
3714                 T)?
3715                 (?:
3716                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3717                 )?
3718                 (?:
3719                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3720                 )?
3721                 (?:
3722                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3723                 )?Z?$''', s)
3724         if m:
3725             days, hours, mins, secs, ms = m.groups()
3726         else:
3727             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3728             if m:
3729                 hours, mins = m.groups()
3730             else:
3731                 return None
3732
3733     duration = 0
3734     if secs:
3735         duration += float(secs)
3736     if mins:
3737         duration += float(mins) * 60
3738     if hours:
3739         duration += float(hours) * 60 * 60
3740     if days:
3741         duration += float(days) * 24 * 60 * 60
3742     if ms:
3743         duration += float(ms)
3744     return duration
3745
3746
3747 def prepend_extension(filename, ext, expected_real_ext=None):
3748     name, real_ext = os.path.splitext(filename)
3749     return (
3750         '{0}.{1}{2}'.format(name, ext, real_ext)
3751         if not expected_real_ext or real_ext[1:] == expected_real_ext
3752         else '{0}.{1}'.format(filename, ext))
3753
3754
3755 def replace_extension(filename, ext, expected_real_ext=None):
3756     name, real_ext = os.path.splitext(filename)
3757     return '{0}.{1}'.format(
3758         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3759         ext)
3760
3761
3762 def check_executable(exe, args=[]):
3763     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3764     args can be a list of arguments for a short output (like -version) """
3765     try:
3766         process_communicate_or_kill(subprocess.Popen(
3767             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3768     except OSError:
3769         return False
3770     return exe
3771
3772
3773 def get_exe_version(exe, args=['--version'],
3774                     version_re=None, unrecognized='present'):
3775     """ Returns the version of the specified executable,
3776     or False if the executable is not present """
3777     try:
3778         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3779         # SIGTTOU if youtube-dlc is run in the background.
3780         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3781         out, _ = process_communicate_or_kill(subprocess.Popen(
3782             [encodeArgument(exe)] + args,
3783             stdin=subprocess.PIPE,
3784             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3785     except OSError:
3786         return False
3787     if isinstance(out, bytes):  # Python 2.x
3788         out = out.decode('ascii', 'ignore')
3789     return detect_exe_version(out, version_re, unrecognized)
3790
3791
3792 def detect_exe_version(output, version_re=None, unrecognized='present'):
3793     assert isinstance(output, compat_str)
3794     if version_re is None:
3795         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3796     m = re.search(version_re, output)
3797     if m:
3798         return m.group(1)
3799     else:
3800         return unrecognized
3801
3802
3803 class PagedList(object):
3804     def __len__(self):
3805         # This is only useful for tests
3806         return len(self.getslice())
3807
3808
3809 class OnDemandPagedList(PagedList):
3810     def __init__(self, pagefunc, pagesize, use_cache=True):
3811         self._pagefunc = pagefunc
3812         self._pagesize = pagesize
3813         self._use_cache = use_cache
3814         if use_cache:
3815             self._cache = {}
3816
3817     def getslice(self, start=0, end=None):
3818         res = []
3819         for pagenum in itertools.count(start // self._pagesize):
3820             firstid = pagenum * self._pagesize
3821             nextfirstid = pagenum * self._pagesize + self._pagesize
3822             if start >= nextfirstid:
3823                 continue
3824
3825             page_results = None
3826             if self._use_cache:
3827                 page_results = self._cache.get(pagenum)
3828             if page_results is None:
3829                 page_results = list(self._pagefunc(pagenum))
3830             if self._use_cache:
3831                 self._cache[pagenum] = page_results
3832
3833             startv = (
3834                 start % self._pagesize
3835                 if firstid <= start < nextfirstid
3836                 else 0)
3837
3838             endv = (
3839                 ((end - 1) % self._pagesize) + 1
3840                 if (end is not None and firstid <= end <= nextfirstid)
3841                 else None)
3842
3843             if startv != 0 or endv is not None:
3844                 page_results = page_results[startv:endv]
3845             res.extend(page_results)
3846
3847             # A little optimization - if current page is not "full", ie. does
3848             # not contain page_size videos then we can assume that this page
3849             # is the last one - there are no more ids on further pages -
3850             # i.e. no need to query again.
3851             if len(page_results) + startv < self._pagesize:
3852                 break
3853
3854             # If we got the whole page, but the next page is not interesting,
3855             # break out early as well
3856             if end == nextfirstid:
3857                 break
3858         return res
3859
3860
3861 class InAdvancePagedList(PagedList):
3862     def __init__(self, pagefunc, pagecount, pagesize):
3863         self._pagefunc = pagefunc
3864         self._pagecount = pagecount
3865         self._pagesize = pagesize
3866
3867     def getslice(self, start=0, end=None):
3868         res = []
3869         start_page = start // self._pagesize
3870         end_page = (
3871             self._pagecount if end is None else (end // self._pagesize + 1))
3872         skip_elems = start - start_page * self._pagesize
3873         only_more = None if end is None else end - start
3874         for pagenum in range(start_page, end_page):
3875             page = list(self._pagefunc(pagenum))
3876             if skip_elems:
3877                 page = page[skip_elems:]
3878                 skip_elems = None
3879             if only_more is not None:
3880                 if len(page) < only_more:
3881                     only_more -= len(page)
3882                 else:
3883                     page = page[:only_more]
3884                     res.extend(page)
3885                     break
3886             res.extend(page)
3887         return res
3888
3889
3890 def uppercase_escape(s):
3891     unicode_escape = codecs.getdecoder('unicode_escape')
3892     return re.sub(
3893         r'\\U[0-9a-fA-F]{8}',
3894         lambda m: unicode_escape(m.group(0))[0],
3895         s)
3896
3897
3898 def lowercase_escape(s):
3899     unicode_escape = codecs.getdecoder('unicode_escape')
3900     return re.sub(
3901         r'\\u[0-9a-fA-F]{4}',
3902         lambda m: unicode_escape(m.group(0))[0],
3903         s)
3904
3905
3906 def escape_rfc3986(s):
3907     """Escape non-ASCII characters as suggested by RFC 3986"""
3908     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3909         s = s.encode('utf-8')
3910     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3911
3912
3913 def escape_url(url):
3914     """Escape URL as suggested by RFC 3986"""
3915     url_parsed = compat_urllib_parse_urlparse(url)
3916     return url_parsed._replace(
3917         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3918         path=escape_rfc3986(url_parsed.path),
3919         params=escape_rfc3986(url_parsed.params),
3920         query=escape_rfc3986(url_parsed.query),
3921         fragment=escape_rfc3986(url_parsed.fragment)
3922     ).geturl()
3923
3924
3925 def read_batch_urls(batch_fd):
3926     def fixup(url):
3927         if not isinstance(url, compat_str):
3928             url = url.decode('utf-8', 'replace')
3929         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3930         for bom in BOM_UTF8:
3931             if url.startswith(bom):
3932                 url = url[len(bom):]
3933         url = url.lstrip()
3934         if not url or url.startswith(('#', ';', ']')):
3935             return False
3936         # "#" cannot be stripped out since it is part of the URI
3937         # However, it can be safely stipped out if follwing a whitespace
3938         return re.split(r'\s#', url, 1)[0].rstrip()
3939
3940     with contextlib.closing(batch_fd) as fd:
3941         return [url for url in map(fixup, fd) if url]
3942
3943
3944 def urlencode_postdata(*args, **kargs):
3945     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3946
3947
3948 def update_url_query(url, query):
3949     if not query:
3950         return url
3951     parsed_url = compat_urlparse.urlparse(url)
3952     qs = compat_parse_qs(parsed_url.query)
3953     qs.update(query)
3954     return compat_urlparse.urlunparse(parsed_url._replace(
3955         query=compat_urllib_parse_urlencode(qs, True)))
3956
3957
3958 def update_Request(req, url=None, data=None, headers={}, query={}):
3959     req_headers = req.headers.copy()
3960     req_headers.update(headers)
3961     req_data = data or req.data
3962     req_url = update_url_query(url or req.get_full_url(), query)
3963     req_get_method = req.get_method()
3964     if req_get_method == 'HEAD':
3965         req_type = HEADRequest
3966     elif req_get_method == 'PUT':
3967         req_type = PUTRequest
3968     else:
3969         req_type = compat_urllib_request.Request
3970     new_req = req_type(
3971         req_url, data=req_data, headers=req_headers,
3972         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3973     if hasattr(req, 'timeout'):
3974         new_req.timeout = req.timeout
3975     return new_req
3976
3977
3978 def _multipart_encode_impl(data, boundary):
3979     content_type = 'multipart/form-data; boundary=%s' % boundary
3980
3981     out = b''
3982     for k, v in data.items():
3983         out += b'--' + boundary.encode('ascii') + b'\r\n'
3984         if isinstance(k, compat_str):
3985             k = k.encode('utf-8')
3986         if isinstance(v, compat_str):
3987             v = v.encode('utf-8')
3988         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3989         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3990         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3991         if boundary.encode('ascii') in content:
3992             raise ValueError('Boundary overlaps with data')
3993         out += content
3994
3995     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3996
3997     return out, content_type
3998
3999
4000 def multipart_encode(data, boundary=None):
4001     '''
4002     Encode a dict to RFC 7578-compliant form-data
4003
4004     data:
4005         A dict where keys and values can be either Unicode or bytes-like
4006         objects.
4007     boundary:
4008         If specified a Unicode object, it's used as the boundary. Otherwise
4009         a random boundary is generated.
4010
4011     Reference: https://tools.ietf.org/html/rfc7578
4012     '''
4013     has_specified_boundary = boundary is not None
4014
4015     while True:
4016         if boundary is None:
4017             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4018
4019         try:
4020             out, content_type = _multipart_encode_impl(data, boundary)
4021             break
4022         except ValueError:
4023             if has_specified_boundary:
4024                 raise
4025             boundary = None
4026
4027     return out, content_type
4028
4029
4030 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4031     if isinstance(key_or_keys, (list, tuple)):
4032         for key in key_or_keys:
4033             if key not in d or d[key] is None or skip_false_values and not d[key]:
4034                 continue
4035             return d[key]
4036         return default
4037     return d.get(key_or_keys, default)
4038
4039
4040 def try_get(src, getter, expected_type=None):
4041     if not isinstance(getter, (list, tuple)):
4042         getter = [getter]
4043     for get in getter:
4044         try:
4045             v = get(src)
4046         except (AttributeError, KeyError, TypeError, IndexError):
4047             pass
4048         else:
4049             if expected_type is None or isinstance(v, expected_type):
4050                 return v
4051
4052
4053 def merge_dicts(*dicts):
4054     merged = {}
4055     for a_dict in dicts:
4056         for k, v in a_dict.items():
4057             if v is None:
4058                 continue
4059             if (k not in merged
4060                     or (isinstance(v, compat_str) and v
4061                         and isinstance(merged[k], compat_str)
4062                         and not merged[k])):
4063                 merged[k] = v
4064     return merged
4065
4066
4067 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4068     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4069
4070
4071 US_RATINGS = {
4072     'G': 0,
4073     'PG': 10,
4074     'PG-13': 13,
4075     'R': 16,
4076     'NC': 18,
4077 }
4078
4079
4080 TV_PARENTAL_GUIDELINES = {
4081     'TV-Y': 0,
4082     'TV-Y7': 7,
4083     'TV-G': 0,
4084     'TV-PG': 0,
4085     'TV-14': 14,
4086     'TV-MA': 17,
4087 }
4088
4089
4090 def parse_age_limit(s):
4091     if type(s) == int:
4092         return s if 0 <= s <= 21 else None
4093     if not isinstance(s, compat_basestring):
4094         return None
4095     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4096     if m:
4097         return int(m.group('age'))
4098     if s in US_RATINGS:
4099         return US_RATINGS[s]
4100     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4101     if m:
4102         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4103     return None
4104
4105
4106 def strip_jsonp(code):
4107     return re.sub(
4108         r'''(?sx)^
4109             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4110             (?:\s*&&\s*(?P=func_name))?
4111             \s*\(\s*(?P<callback_data>.*)\);?
4112             \s*?(?://[^\n]*)*$''',
4113         r'\g<callback_data>', code)
4114
4115
4116 def js_to_json(code, vars={}):
4117     # vars is a dict of var, val pairs to substitute
4118     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4119     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4120     INTEGER_TABLE = (
4121         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4122         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4123     )
4124
4125     def fix_kv(m):
4126         v = m.group(0)
4127         if v in ('true', 'false', 'null'):
4128             return v
4129         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4130             return ""
4131
4132         if v[0] in ("'", '"'):
4133             v = re.sub(r'(?s)\\.|"', lambda m: {
4134                 '"': '\\"',
4135                 "\\'": "'",
4136                 '\\\n': '',
4137                 '\\x': '\\u00',
4138             }.get(m.group(0), m.group(0)), v[1:-1])
4139         else:
4140             for regex, base in INTEGER_TABLE:
4141                 im = re.match(regex, v)
4142                 if im:
4143                     i = int(im.group(1), base)
4144                     return '"%d":' % i if v.endswith(':') else '%d' % i
4145
4146             if v in vars:
4147                 return vars[v]
4148
4149         return '"%s"' % v
4150
4151     return re.sub(r'''(?sx)
4152         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4153         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4154         {comment}|,(?={skip}[\]}}])|
4155         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4156         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4157         [0-9]+(?={skip}:)|
4158         !+
4159         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4160
4161
4162 def qualities(quality_ids):
4163     """ Get a numeric quality value out of a list of possible values """
4164     def q(qid):
4165         try:
4166             return quality_ids.index(qid)
4167         except ValueError:
4168             return -1
4169     return q
4170
4171
4172 DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
4173
4174
4175 def limit_length(s, length):
4176     """ Add ellipses to overly long strings """
4177     if s is None:
4178         return None
4179     ELLIPSES = '...'
4180     if len(s) > length:
4181         return s[:length - len(ELLIPSES)] + ELLIPSES
4182     return s
4183
4184
4185 def version_tuple(v):
4186     return tuple(int(e) for e in re.split(r'[-.]', v))
4187
4188
4189 def is_outdated_version(version, limit, assume_new=True):
4190     if not version:
4191         return not assume_new
4192     try:
4193         return version_tuple(version) < version_tuple(limit)
4194     except ValueError:
4195         return not assume_new
4196
4197
4198 def ytdl_is_updateable():
4199     """ Returns if youtube-dlc can be updated with -U """
4200     return False
4201
4202     from zipimport import zipimporter
4203
4204     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4205
4206
4207 def args_to_str(args):
4208     # Get a short string representation for a subprocess command
4209     return ' '.join(compat_shlex_quote(a) for a in args)
4210
4211
4212 def error_to_compat_str(err):
4213     err_str = str(err)
4214     # On python 2 error byte string must be decoded with proper
4215     # encoding rather than ascii
4216     if sys.version_info[0] < 3:
4217         err_str = err_str.decode(preferredencoding())
4218     return err_str
4219
4220
4221 def mimetype2ext(mt):
4222     if mt is None:
4223         return None
4224
4225     ext = {
4226         'audio/mp4': 'm4a',
4227         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4228         # it's the most popular one
4229         'audio/mpeg': 'mp3',
4230         'audio/x-wav': 'wav',
4231     }.get(mt)
4232     if ext is not None:
4233         return ext
4234
4235     _, _, res = mt.rpartition('/')
4236     res = res.split(';')[0].strip().lower()
4237
4238     return {
4239         '3gpp': '3gp',
4240         'smptett+xml': 'tt',
4241         'ttaf+xml': 'dfxp',
4242         'ttml+xml': 'ttml',
4243         'x-flv': 'flv',
4244         'x-mp4-fragmented': 'mp4',
4245         'x-ms-sami': 'sami',
4246         'x-ms-wmv': 'wmv',
4247         'mpegurl': 'm3u8',
4248         'x-mpegurl': 'm3u8',
4249         'vnd.apple.mpegurl': 'm3u8',
4250         'dash+xml': 'mpd',
4251         'f4m+xml': 'f4m',
4252         'hds+xml': 'f4m',
4253         'vnd.ms-sstr+xml': 'ism',
4254         'quicktime': 'mov',
4255         'mp2t': 'ts',
4256         'x-wav': 'wav',
4257     }.get(res, res)
4258
4259
4260 def parse_codecs(codecs_str):
4261     # http://tools.ietf.org/html/rfc6381
4262     if not codecs_str:
4263         return {}
4264     split_codecs = list(filter(None, map(
4265         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4266     vcodec, acodec = None, None
4267     for full_codec in split_codecs:
4268         codec = full_codec.split('.')[0]
4269         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4270             if not vcodec:
4271                 vcodec = full_codec
4272         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4273             if not acodec:
4274                 acodec = full_codec
4275         else:
4276             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4277     if not vcodec and not acodec:
4278         if len(split_codecs) == 2:
4279             return {
4280                 'vcodec': split_codecs[0],
4281                 'acodec': split_codecs[1],
4282             }
4283     else:
4284         return {
4285             'vcodec': vcodec or 'none',
4286             'acodec': acodec or 'none',
4287         }
4288     return {}
4289
4290
4291 def urlhandle_detect_ext(url_handle):
4292     getheader = url_handle.headers.get
4293
4294     cd = getheader('Content-Disposition')
4295     if cd:
4296         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4297         if m:
4298             e = determine_ext(m.group('filename'), default_ext=None)
4299             if e:
4300                 return e
4301
4302     return mimetype2ext(getheader('Content-Type'))
4303
4304
4305 def encode_data_uri(data, mime_type):
4306     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4307
4308
4309 def age_restricted(content_limit, age_limit):
4310     """ Returns True iff the content should be blocked """
4311
4312     if age_limit is None:  # No limit set
4313         return False
4314     if content_limit is None:
4315         return False  # Content available for everyone
4316     return age_limit < content_limit
4317
4318
4319 def is_html(first_bytes):
4320     """ Detect whether a file contains HTML by examining its first bytes. """
4321
4322     BOMS = [
4323         (b'\xef\xbb\xbf', 'utf-8'),
4324         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4325         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4326         (b'\xff\xfe', 'utf-16-le'),
4327         (b'\xfe\xff', 'utf-16-be'),
4328     ]
4329     for bom, enc in BOMS:
4330         if first_bytes.startswith(bom):
4331             s = first_bytes[len(bom):].decode(enc, 'replace')
4332             break
4333     else:
4334         s = first_bytes.decode('utf-8', 'replace')
4335
4336     return re.match(r'^\s*<', s)
4337
4338
4339 def determine_protocol(info_dict):
4340     protocol = info_dict.get('protocol')
4341     if protocol is not None:
4342         return protocol
4343
4344     url = info_dict['url']
4345     if url.startswith('rtmp'):
4346         return 'rtmp'
4347     elif url.startswith('mms'):
4348         return 'mms'
4349     elif url.startswith('rtsp'):
4350         return 'rtsp'
4351
4352     ext = determine_ext(url)
4353     if ext == 'm3u8':
4354         return 'm3u8'
4355     elif ext == 'f4m':
4356         return 'f4m'
4357
4358     return compat_urllib_parse_urlparse(url).scheme
4359
4360
4361 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4362     """ Render a list of rows, each as a list of values """
4363
4364     def get_max_lens(table):
4365         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4366
4367     def filter_using_list(row, filterArray):
4368         return [col for (take, col) in zip(filterArray, row) if take]
4369
4370     if hideEmpty:
4371         max_lens = get_max_lens(data)
4372         header_row = filter_using_list(header_row, max_lens)
4373         data = [filter_using_list(row, max_lens) for row in data]
4374
4375     table = [header_row] + data
4376     max_lens = get_max_lens(table)
4377     if delim:
4378         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4379     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4380     return '\n'.join(format_str % tuple(row) for row in table)
4381
4382
4383 def _match_one(filter_part, dct):
4384     COMPARISON_OPERATORS = {
4385         '<': operator.lt,
4386         '<=': operator.le,
4387         '>': operator.gt,
4388         '>=': operator.ge,
4389         '=': operator.eq,
4390         '!=': operator.ne,
4391     }
4392     operator_rex = re.compile(r'''(?x)\s*
4393         (?P<key>[a-z_]+)
4394         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4395         (?:
4396             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4397             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4398             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4399         )
4400         \s*$
4401         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4402     m = operator_rex.search(filter_part)
4403     if m:
4404         op = COMPARISON_OPERATORS[m.group('op')]
4405         actual_value = dct.get(m.group('key'))
4406         if (m.group('quotedstrval') is not None
4407             or m.group('strval') is not None
4408             # If the original field is a string and matching comparisonvalue is
4409             # a number we should respect the origin of the original field
4410             # and process comparison value as a string (see
4411             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4412             or actual_value is not None and m.group('intval') is not None
4413                 and isinstance(actual_value, compat_str)):
4414             if m.group('op') not in ('=', '!='):
4415                 raise ValueError(
4416                     'Operator %s does not support string values!' % m.group('op'))
4417             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4418             quote = m.group('quote')
4419             if quote is not None:
4420                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4421         else:
4422             try:
4423                 comparison_value = int(m.group('intval'))
4424             except ValueError:
4425                 comparison_value = parse_filesize(m.group('intval'))
4426                 if comparison_value is None:
4427                     comparison_value = parse_filesize(m.group('intval') + 'B')
4428                 if comparison_value is None:
4429                     raise ValueError(
4430                         'Invalid integer value %r in filter part %r' % (
4431                             m.group('intval'), filter_part))
4432         if actual_value is None:
4433             return m.group('none_inclusive')
4434         return op(actual_value, comparison_value)
4435
4436     UNARY_OPERATORS = {
4437         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4438         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4439     }
4440     operator_rex = re.compile(r'''(?x)\s*
4441         (?P<op>%s)\s*(?P<key>[a-z_]+)
4442         \s*$
4443         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4444     m = operator_rex.search(filter_part)
4445     if m:
4446         op = UNARY_OPERATORS[m.group('op')]
4447         actual_value = dct.get(m.group('key'))
4448         return op(actual_value)
4449
4450     raise ValueError('Invalid filter part %r' % filter_part)
4451
4452
4453 def match_str(filter_str, dct):
4454     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4455
4456     return all(
4457         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4458
4459
4460 def match_filter_func(filter_str):
4461     def _match_func(info_dict):
4462         if match_str(filter_str, info_dict):
4463             return None
4464         else:
4465             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4466             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4467     return _match_func
4468
4469
4470 def parse_dfxp_time_expr(time_expr):
4471     if not time_expr:
4472         return
4473
4474     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4475     if mobj:
4476         return float(mobj.group('time_offset'))
4477
4478     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4479     if mobj:
4480         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4481
4482
4483 def srt_subtitles_timecode(seconds):
4484     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4485
4486
4487 def dfxp2srt(dfxp_data):
4488     '''
4489     @param dfxp_data A bytes-like object containing DFXP data
4490     @returns A unicode object containing converted SRT data
4491     '''
4492     LEGACY_NAMESPACES = (
4493         (b'http://www.w3.org/ns/ttml', [
4494             b'http://www.w3.org/2004/11/ttaf1',
4495             b'http://www.w3.org/2006/04/ttaf1',
4496             b'http://www.w3.org/2006/10/ttaf1',
4497         ]),
4498         (b'http://www.w3.org/ns/ttml#styling', [
4499             b'http://www.w3.org/ns/ttml#style',
4500         ]),
4501     )
4502
4503     SUPPORTED_STYLING = [
4504         'color',
4505         'fontFamily',
4506         'fontSize',
4507         'fontStyle',
4508         'fontWeight',
4509         'textDecoration'
4510     ]
4511
4512     _x = functools.partial(xpath_with_ns, ns_map={
4513         'xml': 'http://www.w3.org/XML/1998/namespace',
4514         'ttml': 'http://www.w3.org/ns/ttml',
4515         'tts': 'http://www.w3.org/ns/ttml#styling',
4516     })
4517
4518     styles = {}
4519     default_style = {}
4520
4521     class TTMLPElementParser(object):
4522         _out = ''
4523         _unclosed_elements = []
4524         _applied_styles = []
4525
4526         def start(self, tag, attrib):
4527             if tag in (_x('ttml:br'), 'br'):
4528                 self._out += '\n'
4529             else:
4530                 unclosed_elements = []
4531                 style = {}
4532                 element_style_id = attrib.get('style')
4533                 if default_style:
4534                     style.update(default_style)
4535                 if element_style_id:
4536                     style.update(styles.get(element_style_id, {}))
4537                 for prop in SUPPORTED_STYLING:
4538                     prop_val = attrib.get(_x('tts:' + prop))
4539                     if prop_val:
4540                         style[prop] = prop_val
4541                 if style:
4542                     font = ''
4543                     for k, v in sorted(style.items()):
4544                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4545                             continue
4546                         if k == 'color':
4547                             font += ' color="%s"' % v
4548                         elif k == 'fontSize':
4549                             font += ' size="%s"' % v
4550                         elif k == 'fontFamily':
4551                             font += ' face="%s"' % v
4552                         elif k == 'fontWeight' and v == 'bold':
4553                             self._out += '<b>'
4554                             unclosed_elements.append('b')
4555                         elif k == 'fontStyle' and v == 'italic':
4556                             self._out += '<i>'
4557                             unclosed_elements.append('i')
4558                         elif k == 'textDecoration' and v == 'underline':
4559                             self._out += '<u>'
4560                             unclosed_elements.append('u')
4561                     if font:
4562                         self._out += '<font' + font + '>'
4563                         unclosed_elements.append('font')
4564                     applied_style = {}
4565                     if self._applied_styles:
4566                         applied_style.update(self._applied_styles[-1])
4567                     applied_style.update(style)
4568                     self._applied_styles.append(applied_style)
4569                 self._unclosed_elements.append(unclosed_elements)
4570
4571         def end(self, tag):
4572             if tag not in (_x('ttml:br'), 'br'):
4573                 unclosed_elements = self._unclosed_elements.pop()
4574                 for element in reversed(unclosed_elements):
4575                     self._out += '</%s>' % element
4576                 if unclosed_elements and self._applied_styles:
4577                     self._applied_styles.pop()
4578
4579         def data(self, data):
4580             self._out += data
4581
4582         def close(self):
4583             return self._out.strip()
4584
4585     def parse_node(node):
4586         target = TTMLPElementParser()
4587         parser = xml.etree.ElementTree.XMLParser(target=target)
4588         parser.feed(xml.etree.ElementTree.tostring(node))
4589         return parser.close()
4590
4591     for k, v in LEGACY_NAMESPACES:
4592         for ns in v:
4593             dfxp_data = dfxp_data.replace(ns, k)
4594
4595     dfxp = compat_etree_fromstring(dfxp_data)
4596     out = []
4597     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4598
4599     if not paras:
4600         raise ValueError('Invalid dfxp/TTML subtitle')
4601
4602     repeat = False
4603     while True:
4604         for style in dfxp.findall(_x('.//ttml:style')):
4605             style_id = style.get('id') or style.get(_x('xml:id'))
4606             if not style_id:
4607                 continue
4608             parent_style_id = style.get('style')
4609             if parent_style_id:
4610                 if parent_style_id not in styles:
4611                     repeat = True
4612                     continue
4613                 styles[style_id] = styles[parent_style_id].copy()
4614             for prop in SUPPORTED_STYLING:
4615                 prop_val = style.get(_x('tts:' + prop))
4616                 if prop_val:
4617                     styles.setdefault(style_id, {})[prop] = prop_val
4618         if repeat:
4619             repeat = False
4620         else:
4621             break
4622
4623     for p in ('body', 'div'):
4624         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4625         if ele is None:
4626             continue
4627         style = styles.get(ele.get('style'))
4628         if not style:
4629             continue
4630         default_style.update(style)
4631
4632     for para, index in zip(paras, itertools.count(1)):
4633         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4634         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4635         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4636         if begin_time is None:
4637             continue
4638         if not end_time:
4639             if not dur:
4640                 continue
4641             end_time = begin_time + dur
4642         out.append('%d\n%s --> %s\n%s\n\n' % (
4643             index,
4644             srt_subtitles_timecode(begin_time),
4645             srt_subtitles_timecode(end_time),
4646             parse_node(para)))
4647
4648     return ''.join(out)
4649
4650
4651 def cli_option(params, command_option, param):
4652     param = params.get(param)
4653     if param:
4654         param = compat_str(param)
4655     return [command_option, param] if param is not None else []
4656
4657
4658 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4659     param = params.get(param)
4660     if param is None:
4661         return []
4662     assert isinstance(param, bool)
4663     if separator:
4664         return [command_option + separator + (true_value if param else false_value)]
4665     return [command_option, true_value if param else false_value]
4666
4667
4668 def cli_valueless_option(params, command_option, param, expected_value=True):
4669     param = params.get(param)
4670     return [command_option] if param == expected_value else []
4671
4672
4673 def cli_configuration_args(params, arg_name, key, default=[], exe=None):  # returns arg, for_compat
4674     argdict = params.get(arg_name, {})
4675     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4676         return argdict, True
4677
4678     if argdict is None:
4679         return default, False
4680     assert isinstance(argdict, dict)
4681
4682     assert isinstance(key, compat_str)
4683     key = key.lower()
4684
4685     args = exe_args = None
4686     if exe is not None:
4687         assert isinstance(exe, compat_str)
4688         exe = exe.lower()
4689         args = argdict.get('%s+%s' % (key, exe))
4690         if args is None:
4691             exe_args = argdict.get(exe)
4692
4693     if args is None:
4694         args = argdict.get(key) if key != exe else None
4695     if args is None and exe_args is None:
4696         args = argdict.get('default', default)
4697
4698     args, exe_args = args or [], exe_args or []
4699     assert isinstance(args, (list, tuple))
4700     assert isinstance(exe_args, (list, tuple))
4701     return args + exe_args, False
4702
4703
4704 class ISO639Utils(object):
4705     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4706     _lang_map = {
4707         'aa': 'aar',
4708         'ab': 'abk',
4709         'ae': 'ave',
4710         'af': 'afr',
4711         'ak': 'aka',
4712         'am': 'amh',
4713         'an': 'arg',
4714         'ar': 'ara',
4715         'as': 'asm',
4716         'av': 'ava',
4717         'ay': 'aym',
4718         'az': 'aze',
4719         'ba': 'bak',
4720         'be': 'bel',
4721         'bg': 'bul',
4722         'bh': 'bih',
4723         'bi': 'bis',
4724         'bm': 'bam',
4725         'bn': 'ben',
4726         'bo': 'bod',
4727         'br': 'bre',
4728         'bs': 'bos',
4729         'ca': 'cat',
4730         'ce': 'che',
4731         'ch': 'cha',
4732         'co': 'cos',
4733         'cr': 'cre',
4734         'cs': 'ces',
4735         'cu': 'chu',
4736         'cv': 'chv',
4737         'cy': 'cym',
4738         'da': 'dan',
4739         'de': 'deu',
4740         'dv': 'div',
4741         'dz': 'dzo',
4742         'ee': 'ewe',
4743         'el': 'ell',
4744         'en': 'eng',
4745         'eo': 'epo',
4746         'es': 'spa',
4747         'et': 'est',
4748         'eu': 'eus',
4749         'fa': 'fas',
4750         'ff': 'ful',
4751         'fi': 'fin',
4752         'fj': 'fij',
4753         'fo': 'fao',
4754         'fr': 'fra',
4755         'fy': 'fry',
4756         'ga': 'gle',
4757         'gd': 'gla',
4758         'gl': 'glg',
4759         'gn': 'grn',
4760         'gu': 'guj',
4761         'gv': 'glv',
4762         'ha': 'hau',
4763         'he': 'heb',
4764         'iw': 'heb',  # Replaced by he in 1989 revision
4765         'hi': 'hin',
4766         'ho': 'hmo',
4767         'hr': 'hrv',
4768         'ht': 'hat',
4769         'hu': 'hun',
4770         'hy': 'hye',
4771         'hz': 'her',
4772         'ia': 'ina',
4773         'id': 'ind',
4774         'in': 'ind',  # Replaced by id in 1989 revision
4775         'ie': 'ile',
4776         'ig': 'ibo',
4777         'ii': 'iii',
4778         'ik': 'ipk',
4779         'io': 'ido',
4780         'is': 'isl',
4781         'it': 'ita',
4782         'iu': 'iku',
4783         'ja': 'jpn',
4784         'jv': 'jav',
4785         'ka': 'kat',
4786         'kg': 'kon',
4787         'ki': 'kik',
4788         'kj': 'kua',
4789         'kk': 'kaz',
4790         'kl': 'kal',
4791         'km': 'khm',
4792         'kn': 'kan',
4793         'ko': 'kor',
4794         'kr': 'kau',
4795         'ks': 'kas',
4796         'ku': 'kur',
4797         'kv': 'kom',
4798         'kw': 'cor',
4799         'ky': 'kir',
4800         'la': 'lat',
4801         'lb': 'ltz',
4802         'lg': 'lug',
4803         'li': 'lim',
4804         'ln': 'lin',
4805         'lo': 'lao',
4806         'lt': 'lit',
4807         'lu': 'lub',
4808         'lv': 'lav',
4809         'mg': 'mlg',
4810         'mh': 'mah',
4811         'mi': 'mri',
4812         'mk': 'mkd',
4813         'ml': 'mal',
4814         'mn': 'mon',
4815         'mr': 'mar',
4816         'ms': 'msa',
4817         'mt': 'mlt',
4818         'my': 'mya',
4819         'na': 'nau',
4820         'nb': 'nob',
4821         'nd': 'nde',
4822         'ne': 'nep',
4823         'ng': 'ndo',
4824         'nl': 'nld',
4825         'nn': 'nno',
4826         'no': 'nor',
4827         'nr': 'nbl',
4828         'nv': 'nav',
4829         'ny': 'nya',
4830         'oc': 'oci',
4831         'oj': 'oji',
4832         'om': 'orm',
4833         'or': 'ori',
4834         'os': 'oss',
4835         'pa': 'pan',
4836         'pi': 'pli',
4837         'pl': 'pol',
4838         'ps': 'pus',
4839         'pt': 'por',
4840         'qu': 'que',
4841         'rm': 'roh',
4842         'rn': 'run',
4843         'ro': 'ron',
4844         'ru': 'rus',
4845         'rw': 'kin',
4846         'sa': 'san',
4847         'sc': 'srd',
4848         'sd': 'snd',
4849         'se': 'sme',
4850         'sg': 'sag',
4851         'si': 'sin',
4852         'sk': 'slk',
4853         'sl': 'slv',
4854         'sm': 'smo',
4855         'sn': 'sna',
4856         'so': 'som',
4857         'sq': 'sqi',
4858         'sr': 'srp',
4859         'ss': 'ssw',
4860         'st': 'sot',
4861         'su': 'sun',
4862         'sv': 'swe',
4863         'sw': 'swa',
4864         'ta': 'tam',
4865         'te': 'tel',
4866         'tg': 'tgk',
4867         'th': 'tha',
4868         'ti': 'tir',
4869         'tk': 'tuk',
4870         'tl': 'tgl',
4871         'tn': 'tsn',
4872         'to': 'ton',
4873         'tr': 'tur',
4874         'ts': 'tso',
4875         'tt': 'tat',
4876         'tw': 'twi',
4877         'ty': 'tah',
4878         'ug': 'uig',
4879         'uk': 'ukr',
4880         'ur': 'urd',
4881         'uz': 'uzb',
4882         've': 'ven',
4883         'vi': 'vie',
4884         'vo': 'vol',
4885         'wa': 'wln',
4886         'wo': 'wol',
4887         'xh': 'xho',
4888         'yi': 'yid',
4889         'ji': 'yid',  # Replaced by yi in 1989 revision
4890         'yo': 'yor',
4891         'za': 'zha',
4892         'zh': 'zho',
4893         'zu': 'zul',
4894     }
4895
4896     @classmethod
4897     def short2long(cls, code):
4898         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4899         return cls._lang_map.get(code[:2])
4900
4901     @classmethod
4902     def long2short(cls, code):
4903         """Convert language code from ISO 639-2/T to ISO 639-1"""
4904         for short_name, long_name in cls._lang_map.items():
4905             if long_name == code:
4906                 return short_name
4907
4908
4909 class ISO3166Utils(object):
4910     # From http://data.okfn.org/data/core/country-list
4911     _country_map = {
4912         'AF': 'Afghanistan',
4913         'AX': 'Åland Islands',
4914         'AL': 'Albania',
4915         'DZ': 'Algeria',
4916         'AS': 'American Samoa',
4917         'AD': 'Andorra',
4918         'AO': 'Angola',
4919         'AI': 'Anguilla',
4920         'AQ': 'Antarctica',
4921         'AG': 'Antigua and Barbuda',
4922         'AR': 'Argentina',
4923         'AM': 'Armenia',
4924         'AW': 'Aruba',
4925         'AU': 'Australia',
4926         'AT': 'Austria',
4927         'AZ': 'Azerbaijan',
4928         'BS': 'Bahamas',
4929         'BH': 'Bahrain',
4930         'BD': 'Bangladesh',
4931         'BB': 'Barbados',
4932         'BY': 'Belarus',
4933         'BE': 'Belgium',
4934         'BZ': 'Belize',
4935         'BJ': 'Benin',
4936         'BM': 'Bermuda',
4937         'BT': 'Bhutan',
4938         'BO': 'Bolivia, Plurinational State of',
4939         'BQ': 'Bonaire, Sint Eustatius and Saba',
4940         'BA': 'Bosnia and Herzegovina',
4941         'BW': 'Botswana',
4942         'BV': 'Bouvet Island',
4943         'BR': 'Brazil',
4944         'IO': 'British Indian Ocean Territory',
4945         'BN': 'Brunei Darussalam',
4946         'BG': 'Bulgaria',
4947         'BF': 'Burkina Faso',
4948         'BI': 'Burundi',
4949         'KH': 'Cambodia',
4950         'CM': 'Cameroon',
4951         'CA': 'Canada',
4952         'CV': 'Cape Verde',
4953         'KY': 'Cayman Islands',
4954         'CF': 'Central African Republic',
4955         'TD': 'Chad',
4956         'CL': 'Chile',
4957         'CN': 'China',
4958         'CX': 'Christmas Island',
4959         'CC': 'Cocos (Keeling) Islands',
4960         'CO': 'Colombia',
4961         'KM': 'Comoros',
4962         'CG': 'Congo',
4963         'CD': 'Congo, the Democratic Republic of the',
4964         'CK': 'Cook Islands',
4965         'CR': 'Costa Rica',
4966         'CI': 'Côte d\'Ivoire',
4967         'HR': 'Croatia',
4968         'CU': 'Cuba',
4969         'CW': 'Curaçao',
4970         'CY': 'Cyprus',
4971         'CZ': 'Czech Republic',
4972         'DK': 'Denmark',
4973         'DJ': 'Djibouti',
4974         'DM': 'Dominica',
4975         'DO': 'Dominican Republic',
4976         'EC': 'Ecuador',
4977         'EG': 'Egypt',
4978         'SV': 'El Salvador',
4979         'GQ': 'Equatorial Guinea',
4980         'ER': 'Eritrea',
4981         'EE': 'Estonia',
4982         'ET': 'Ethiopia',
4983         'FK': 'Falkland Islands (Malvinas)',
4984         'FO': 'Faroe Islands',
4985         'FJ': 'Fiji',
4986         'FI': 'Finland',
4987         'FR': 'France',
4988         'GF': 'French Guiana',
4989         'PF': 'French Polynesia',
4990         'TF': 'French Southern Territories',
4991         'GA': 'Gabon',
4992         'GM': 'Gambia',
4993         'GE': 'Georgia',
4994         'DE': 'Germany',
4995         'GH': 'Ghana',
4996         'GI': 'Gibraltar',
4997         'GR': 'Greece',
4998         'GL': 'Greenland',
4999         'GD': 'Grenada',
5000         'GP': 'Guadeloupe',
5001         'GU': 'Guam',
5002         'GT': 'Guatemala',
5003         'GG': 'Guernsey',
5004         'GN': 'Guinea',
5005         'GW': 'Guinea-Bissau',
5006         'GY': 'Guyana',
5007         'HT': 'Haiti',
5008         'HM': 'Heard Island and McDonald Islands',
5009         'VA': 'Holy See (Vatican City State)',
5010         'HN': 'Honduras',
5011         'HK': 'Hong Kong',
5012         'HU': 'Hungary',
5013         'IS': 'Iceland',
5014         'IN': 'India',
5015         'ID': 'Indonesia',
5016         'IR': 'Iran, Islamic Republic of',
5017         'IQ': 'Iraq',
5018         'IE': 'Ireland',
5019         'IM': 'Isle of Man',
5020         'IL': 'Israel',
5021         'IT': 'Italy',
5022         'JM': 'Jamaica',
5023         'JP': 'Japan',
5024         'JE': 'Jersey',
5025         'JO': 'Jordan',
5026         'KZ': 'Kazakhstan',
5027         'KE': 'Kenya',
5028         'KI': 'Kiribati',
5029         'KP': 'Korea, Democratic People\'s Republic of',
5030         'KR': 'Korea, Republic of',
5031         'KW': 'Kuwait',
5032         'KG': 'Kyrgyzstan',
5033         'LA': 'Lao People\'s Democratic Republic',
5034         'LV': 'Latvia',
5035         'LB': 'Lebanon',
5036         'LS': 'Lesotho',
5037         'LR': 'Liberia',
5038         'LY': 'Libya',
5039         'LI': 'Liechtenstein',
5040         'LT': 'Lithuania',
5041         'LU': 'Luxembourg',
5042         'MO': 'Macao',
5043         'MK': 'Macedonia, the Former Yugoslav Republic of',
5044         'MG': 'Madagascar',
5045         'MW': 'Malawi',
5046         'MY': 'Malaysia',
5047         'MV': 'Maldives',
5048         'ML': 'Mali',
5049         'MT': 'Malta',
5050         'MH': 'Marshall Islands',
5051         'MQ': 'Martinique',
5052         'MR': 'Mauritania',
5053         'MU': 'Mauritius',
5054         'YT': 'Mayotte',
5055         'MX': 'Mexico',
5056         'FM': 'Micronesia, Federated States of',
5057         'MD': 'Moldova, Republic of',
5058         'MC': 'Monaco',
5059         'MN': 'Mongolia',
5060         'ME': 'Montenegro',
5061         'MS': 'Montserrat',
5062         'MA': 'Morocco',
5063         'MZ': 'Mozambique',
5064         'MM': 'Myanmar',
5065         'NA': 'Namibia',
5066         'NR': 'Nauru',
5067         'NP': 'Nepal',
5068         'NL': 'Netherlands',
5069         'NC': 'New Caledonia',
5070         'NZ': 'New Zealand',
5071         'NI': 'Nicaragua',
5072         'NE': 'Niger',
5073         'NG': 'Nigeria',
5074         'NU': 'Niue',
5075         'NF': 'Norfolk Island',
5076         'MP': 'Northern Mariana Islands',
5077         'NO': 'Norway',
5078         'OM': 'Oman',
5079         'PK': 'Pakistan',
5080         'PW': 'Palau',
5081         'PS': 'Palestine, State of',
5082         'PA': 'Panama',
5083         'PG': 'Papua New Guinea',
5084         'PY': 'Paraguay',
5085         'PE': 'Peru',
5086         'PH': 'Philippines',
5087         'PN': 'Pitcairn',
5088         'PL': 'Poland',
5089         'PT': 'Portugal',
5090         'PR': 'Puerto Rico',
5091         'QA': 'Qatar',
5092         'RE': 'Réunion',
5093         'RO': 'Romania',
5094         'RU': 'Russian Federation',
5095         'RW': 'Rwanda',
5096         'BL': 'Saint Barthélemy',
5097         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5098         'KN': 'Saint Kitts and Nevis',
5099         'LC': 'Saint Lucia',
5100         'MF': 'Saint Martin (French part)',
5101         'PM': 'Saint Pierre and Miquelon',
5102         'VC': 'Saint Vincent and the Grenadines',
5103         'WS': 'Samoa',
5104         'SM': 'San Marino',
5105         'ST': 'Sao Tome and Principe',
5106         'SA': 'Saudi Arabia',
5107         'SN': 'Senegal',
5108         'RS': 'Serbia',
5109         'SC': 'Seychelles',
5110         'SL': 'Sierra Leone',
5111         'SG': 'Singapore',
5112         'SX': 'Sint Maarten (Dutch part)',
5113         'SK': 'Slovakia',
5114         'SI': 'Slovenia',
5115         'SB': 'Solomon Islands',
5116         'SO': 'Somalia',
5117         'ZA': 'South Africa',
5118         'GS': 'South Georgia and the South Sandwich Islands',
5119         'SS': 'South Sudan',
5120         'ES': 'Spain',
5121         'LK': 'Sri Lanka',
5122         'SD': 'Sudan',
5123         'SR': 'Suriname',
5124         'SJ': 'Svalbard and Jan Mayen',
5125         'SZ': 'Swaziland',
5126         'SE': 'Sweden',
5127         'CH': 'Switzerland',
5128         'SY': 'Syrian Arab Republic',
5129         'TW': 'Taiwan, Province of China',
5130         'TJ': 'Tajikistan',
5131         'TZ': 'Tanzania, United Republic of',
5132         'TH': 'Thailand',
5133         'TL': 'Timor-Leste',
5134         'TG': 'Togo',
5135         'TK': 'Tokelau',
5136         'TO': 'Tonga',
5137         'TT': 'Trinidad and Tobago',
5138         'TN': 'Tunisia',
5139         'TR': 'Turkey',
5140         'TM': 'Turkmenistan',
5141         'TC': 'Turks and Caicos Islands',
5142         'TV': 'Tuvalu',
5143         'UG': 'Uganda',
5144         'UA': 'Ukraine',
5145         'AE': 'United Arab Emirates',
5146         'GB': 'United Kingdom',
5147         'US': 'United States',
5148         'UM': 'United States Minor Outlying Islands',
5149         'UY': 'Uruguay',
5150         'UZ': 'Uzbekistan',
5151         'VU': 'Vanuatu',
5152         'VE': 'Venezuela, Bolivarian Republic of',
5153         'VN': 'Viet Nam',
5154         'VG': 'Virgin Islands, British',
5155         'VI': 'Virgin Islands, U.S.',
5156         'WF': 'Wallis and Futuna',
5157         'EH': 'Western Sahara',
5158         'YE': 'Yemen',
5159         'ZM': 'Zambia',
5160         'ZW': 'Zimbabwe',
5161     }
5162
5163     @classmethod
5164     def short2full(cls, code):
5165         """Convert an ISO 3166-2 country code to the corresponding full name"""
5166         return cls._country_map.get(code.upper())
5167
5168
5169 class GeoUtils(object):
5170     # Major IPv4 address blocks per country
5171     _country_ip_map = {
5172         'AD': '46.172.224.0/19',
5173         'AE': '94.200.0.0/13',
5174         'AF': '149.54.0.0/17',
5175         'AG': '209.59.64.0/18',
5176         'AI': '204.14.248.0/21',
5177         'AL': '46.99.0.0/16',
5178         'AM': '46.70.0.0/15',
5179         'AO': '105.168.0.0/13',
5180         'AP': '182.50.184.0/21',
5181         'AQ': '23.154.160.0/24',
5182         'AR': '181.0.0.0/12',
5183         'AS': '202.70.112.0/20',
5184         'AT': '77.116.0.0/14',
5185         'AU': '1.128.0.0/11',
5186         'AW': '181.41.0.0/18',
5187         'AX': '185.217.4.0/22',
5188         'AZ': '5.197.0.0/16',
5189         'BA': '31.176.128.0/17',
5190         'BB': '65.48.128.0/17',
5191         'BD': '114.130.0.0/16',
5192         'BE': '57.0.0.0/8',
5193         'BF': '102.178.0.0/15',
5194         'BG': '95.42.0.0/15',
5195         'BH': '37.131.0.0/17',
5196         'BI': '154.117.192.0/18',
5197         'BJ': '137.255.0.0/16',
5198         'BL': '185.212.72.0/23',
5199         'BM': '196.12.64.0/18',
5200         'BN': '156.31.0.0/16',
5201         'BO': '161.56.0.0/16',
5202         'BQ': '161.0.80.0/20',
5203         'BR': '191.128.0.0/12',
5204         'BS': '24.51.64.0/18',
5205         'BT': '119.2.96.0/19',
5206         'BW': '168.167.0.0/16',
5207         'BY': '178.120.0.0/13',
5208         'BZ': '179.42.192.0/18',
5209         'CA': '99.224.0.0/11',
5210         'CD': '41.243.0.0/16',
5211         'CF': '197.242.176.0/21',
5212         'CG': '160.113.0.0/16',
5213         'CH': '85.0.0.0/13',
5214         'CI': '102.136.0.0/14',
5215         'CK': '202.65.32.0/19',
5216         'CL': '152.172.0.0/14',
5217         'CM': '102.244.0.0/14',
5218         'CN': '36.128.0.0/10',
5219         'CO': '181.240.0.0/12',
5220         'CR': '201.192.0.0/12',
5221         'CU': '152.206.0.0/15',
5222         'CV': '165.90.96.0/19',
5223         'CW': '190.88.128.0/17',
5224         'CY': '31.153.0.0/16',
5225         'CZ': '88.100.0.0/14',
5226         'DE': '53.0.0.0/8',
5227         'DJ': '197.241.0.0/17',
5228         'DK': '87.48.0.0/12',
5229         'DM': '192.243.48.0/20',
5230         'DO': '152.166.0.0/15',
5231         'DZ': '41.96.0.0/12',
5232         'EC': '186.68.0.0/15',
5233         'EE': '90.190.0.0/15',
5234         'EG': '156.160.0.0/11',
5235         'ER': '196.200.96.0/20',
5236         'ES': '88.0.0.0/11',
5237         'ET': '196.188.0.0/14',
5238         'EU': '2.16.0.0/13',
5239         'FI': '91.152.0.0/13',
5240         'FJ': '144.120.0.0/16',
5241         'FK': '80.73.208.0/21',
5242         'FM': '119.252.112.0/20',
5243         'FO': '88.85.32.0/19',
5244         'FR': '90.0.0.0/9',
5245         'GA': '41.158.0.0/15',
5246         'GB': '25.0.0.0/8',
5247         'GD': '74.122.88.0/21',
5248         'GE': '31.146.0.0/16',
5249         'GF': '161.22.64.0/18',
5250         'GG': '62.68.160.0/19',
5251         'GH': '154.160.0.0/12',
5252         'GI': '95.164.0.0/16',
5253         'GL': '88.83.0.0/19',
5254         'GM': '160.182.0.0/15',
5255         'GN': '197.149.192.0/18',
5256         'GP': '104.250.0.0/19',
5257         'GQ': '105.235.224.0/20',
5258         'GR': '94.64.0.0/13',
5259         'GT': '168.234.0.0/16',
5260         'GU': '168.123.0.0/16',
5261         'GW': '197.214.80.0/20',
5262         'GY': '181.41.64.0/18',
5263         'HK': '113.252.0.0/14',
5264         'HN': '181.210.0.0/16',
5265         'HR': '93.136.0.0/13',
5266         'HT': '148.102.128.0/17',
5267         'HU': '84.0.0.0/14',
5268         'ID': '39.192.0.0/10',
5269         'IE': '87.32.0.0/12',
5270         'IL': '79.176.0.0/13',
5271         'IM': '5.62.80.0/20',
5272         'IN': '117.192.0.0/10',
5273         'IO': '203.83.48.0/21',
5274         'IQ': '37.236.0.0/14',
5275         'IR': '2.176.0.0/12',
5276         'IS': '82.221.0.0/16',
5277         'IT': '79.0.0.0/10',
5278         'JE': '87.244.64.0/18',
5279         'JM': '72.27.0.0/17',
5280         'JO': '176.29.0.0/16',
5281         'JP': '133.0.0.0/8',
5282         'KE': '105.48.0.0/12',
5283         'KG': '158.181.128.0/17',
5284         'KH': '36.37.128.0/17',
5285         'KI': '103.25.140.0/22',
5286         'KM': '197.255.224.0/20',
5287         'KN': '198.167.192.0/19',
5288         'KP': '175.45.176.0/22',
5289         'KR': '175.192.0.0/10',
5290         'KW': '37.36.0.0/14',
5291         'KY': '64.96.0.0/15',
5292         'KZ': '2.72.0.0/13',
5293         'LA': '115.84.64.0/18',
5294         'LB': '178.135.0.0/16',
5295         'LC': '24.92.144.0/20',
5296         'LI': '82.117.0.0/19',
5297         'LK': '112.134.0.0/15',
5298         'LR': '102.183.0.0/16',
5299         'LS': '129.232.0.0/17',
5300         'LT': '78.56.0.0/13',
5301         'LU': '188.42.0.0/16',
5302         'LV': '46.109.0.0/16',
5303         'LY': '41.252.0.0/14',
5304         'MA': '105.128.0.0/11',
5305         'MC': '88.209.64.0/18',
5306         'MD': '37.246.0.0/16',
5307         'ME': '178.175.0.0/17',
5308         'MF': '74.112.232.0/21',
5309         'MG': '154.126.0.0/17',
5310         'MH': '117.103.88.0/21',
5311         'MK': '77.28.0.0/15',
5312         'ML': '154.118.128.0/18',
5313         'MM': '37.111.0.0/17',
5314         'MN': '49.0.128.0/17',
5315         'MO': '60.246.0.0/16',
5316         'MP': '202.88.64.0/20',
5317         'MQ': '109.203.224.0/19',
5318         'MR': '41.188.64.0/18',
5319         'MS': '208.90.112.0/22',
5320         'MT': '46.11.0.0/16',
5321         'MU': '105.16.0.0/12',
5322         'MV': '27.114.128.0/18',
5323         'MW': '102.70.0.0/15',
5324         'MX': '187.192.0.0/11',
5325         'MY': '175.136.0.0/13',
5326         'MZ': '197.218.0.0/15',
5327         'NA': '41.182.0.0/16',
5328         'NC': '101.101.0.0/18',
5329         'NE': '197.214.0.0/18',
5330         'NF': '203.17.240.0/22',
5331         'NG': '105.112.0.0/12',
5332         'NI': '186.76.0.0/15',
5333         'NL': '145.96.0.0/11',
5334         'NO': '84.208.0.0/13',
5335         'NP': '36.252.0.0/15',
5336         'NR': '203.98.224.0/19',
5337         'NU': '49.156.48.0/22',
5338         'NZ': '49.224.0.0/14',
5339         'OM': '5.36.0.0/15',
5340         'PA': '186.72.0.0/15',
5341         'PE': '186.160.0.0/14',
5342         'PF': '123.50.64.0/18',
5343         'PG': '124.240.192.0/19',
5344         'PH': '49.144.0.0/13',
5345         'PK': '39.32.0.0/11',
5346         'PL': '83.0.0.0/11',
5347         'PM': '70.36.0.0/20',
5348         'PR': '66.50.0.0/16',
5349         'PS': '188.161.0.0/16',
5350         'PT': '85.240.0.0/13',
5351         'PW': '202.124.224.0/20',
5352         'PY': '181.120.0.0/14',
5353         'QA': '37.210.0.0/15',
5354         'RE': '102.35.0.0/16',
5355         'RO': '79.112.0.0/13',
5356         'RS': '93.86.0.0/15',
5357         'RU': '5.136.0.0/13',
5358         'RW': '41.186.0.0/16',
5359         'SA': '188.48.0.0/13',
5360         'SB': '202.1.160.0/19',
5361         'SC': '154.192.0.0/11',
5362         'SD': '102.120.0.0/13',
5363         'SE': '78.64.0.0/12',
5364         'SG': '8.128.0.0/10',
5365         'SI': '188.196.0.0/14',
5366         'SK': '78.98.0.0/15',
5367         'SL': '102.143.0.0/17',
5368         'SM': '89.186.32.0/19',
5369         'SN': '41.82.0.0/15',
5370         'SO': '154.115.192.0/18',
5371         'SR': '186.179.128.0/17',
5372         'SS': '105.235.208.0/21',
5373         'ST': '197.159.160.0/19',
5374         'SV': '168.243.0.0/16',
5375         'SX': '190.102.0.0/20',
5376         'SY': '5.0.0.0/16',
5377         'SZ': '41.84.224.0/19',
5378         'TC': '65.255.48.0/20',
5379         'TD': '154.68.128.0/19',
5380         'TG': '196.168.0.0/14',
5381         'TH': '171.96.0.0/13',
5382         'TJ': '85.9.128.0/18',
5383         'TK': '27.96.24.0/21',
5384         'TL': '180.189.160.0/20',
5385         'TM': '95.85.96.0/19',
5386         'TN': '197.0.0.0/11',
5387         'TO': '175.176.144.0/21',
5388         'TR': '78.160.0.0/11',
5389         'TT': '186.44.0.0/15',
5390         'TV': '202.2.96.0/19',
5391         'TW': '120.96.0.0/11',
5392         'TZ': '156.156.0.0/14',
5393         'UA': '37.52.0.0/14',
5394         'UG': '102.80.0.0/13',
5395         'US': '6.0.0.0/8',
5396         'UY': '167.56.0.0/13',
5397         'UZ': '84.54.64.0/18',
5398         'VA': '212.77.0.0/19',
5399         'VC': '207.191.240.0/21',
5400         'VE': '186.88.0.0/13',
5401         'VG': '66.81.192.0/20',
5402         'VI': '146.226.0.0/16',
5403         'VN': '14.160.0.0/11',
5404         'VU': '202.80.32.0/20',
5405         'WF': '117.20.32.0/21',
5406         'WS': '202.4.32.0/19',
5407         'YE': '134.35.0.0/16',
5408         'YT': '41.242.116.0/22',
5409         'ZA': '41.0.0.0/11',
5410         'ZM': '102.144.0.0/13',
5411         'ZW': '102.177.192.0/18',
5412     }
5413
5414     @classmethod
5415     def random_ipv4(cls, code_or_block):
5416         if len(code_or_block) == 2:
5417             block = cls._country_ip_map.get(code_or_block.upper())
5418             if not block:
5419                 return None
5420         else:
5421             block = code_or_block
5422         addr, preflen = block.split('/')
5423         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5424         addr_max = addr_min | (0xffffffff >> int(preflen))
5425         return compat_str(socket.inet_ntoa(
5426             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5427
5428
5429 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5430     def __init__(self, proxies=None):
5431         # Set default handlers
5432         for type in ('http', 'https'):
5433             setattr(self, '%s_open' % type,
5434                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5435                         meth(r, proxy, type))
5436         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5437
5438     def proxy_open(self, req, proxy, type):
5439         req_proxy = req.headers.get('Ytdl-request-proxy')
5440         if req_proxy is not None:
5441             proxy = req_proxy
5442             del req.headers['Ytdl-request-proxy']
5443
5444         if proxy == '__noproxy__':
5445             return None  # No Proxy
5446         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5447             req.add_header('Ytdl-socks-proxy', proxy)
5448             # youtube-dlc's http/https handlers do wrapping the socket with socks
5449             return None
5450         return compat_urllib_request.ProxyHandler.proxy_open(
5451             self, req, proxy, type)
5452
5453
5454 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5455 # released into Public Domain
5456 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5457
5458 def long_to_bytes(n, blocksize=0):
5459     """long_to_bytes(n:long, blocksize:int) : string
5460     Convert a long integer to a byte string.
5461
5462     If optional blocksize is given and greater than zero, pad the front of the
5463     byte string with binary zeros so that the length is a multiple of
5464     blocksize.
5465     """
5466     # after much testing, this algorithm was deemed to be the fastest
5467     s = b''
5468     n = int(n)
5469     while n > 0:
5470         s = compat_struct_pack('>I', n & 0xffffffff) + s
5471         n = n >> 32
5472     # strip off leading zeros
5473     for i in range(len(s)):
5474         if s[i] != b'\000'[0]:
5475             break
5476     else:
5477         # only happens when n == 0
5478         s = b'\000'
5479         i = 0
5480     s = s[i:]
5481     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5482     # de-padding being done above, but sigh...
5483     if blocksize > 0 and len(s) % blocksize:
5484         s = (blocksize - len(s) % blocksize) * b'\000' + s
5485     return s
5486
5487
5488 def bytes_to_long(s):
5489     """bytes_to_long(string) : long
5490     Convert a byte string to a long integer.
5491
5492     This is (essentially) the inverse of long_to_bytes().
5493     """
5494     acc = 0
5495     length = len(s)
5496     if length % 4:
5497         extra = (4 - length % 4)
5498         s = b'\000' * extra + s
5499         length = length + extra
5500     for i in range(0, length, 4):
5501         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5502     return acc
5503
5504
5505 def ohdave_rsa_encrypt(data, exponent, modulus):
5506     '''
5507     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5508
5509     Input:
5510         data: data to encrypt, bytes-like object
5511         exponent, modulus: parameter e and N of RSA algorithm, both integer
5512     Output: hex string of encrypted data
5513
5514     Limitation: supports one block encryption only
5515     '''
5516
5517     payload = int(binascii.hexlify(data[::-1]), 16)
5518     encrypted = pow(payload, exponent, modulus)
5519     return '%x' % encrypted
5520
5521
5522 def pkcs1pad(data, length):
5523     """
5524     Padding input data with PKCS#1 scheme
5525
5526     @param {int[]} data        input data
5527     @param {int}   length      target length
5528     @returns {int[]}           padded data
5529     """
5530     if len(data) > length - 11:
5531         raise ValueError('Input data too long for PKCS#1 padding')
5532
5533     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5534     return [0, 2] + pseudo_random + [0] + data
5535
5536
5537 def encode_base_n(num, n, table=None):
5538     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5539     if not table:
5540         table = FULL_TABLE[:n]
5541
5542     if n > len(table):
5543         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5544
5545     if num == 0:
5546         return table[0]
5547
5548     ret = ''
5549     while num:
5550         ret = table[num % n] + ret
5551         num = num // n
5552     return ret
5553
5554
5555 def decode_packed_codes(code):
5556     mobj = re.search(PACKED_CODES_RE, code)
5557     obfuscated_code, base, count, symbols = mobj.groups()
5558     base = int(base)
5559     count = int(count)
5560     symbols = symbols.split('|')
5561     symbol_table = {}
5562
5563     while count:
5564         count -= 1
5565         base_n_count = encode_base_n(count, base)
5566         symbol_table[base_n_count] = symbols[count] or base_n_count
5567
5568     return re.sub(
5569         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5570         obfuscated_code)
5571
5572
5573 def caesar(s, alphabet, shift):
5574     if shift == 0:
5575         return s
5576     l = len(alphabet)
5577     return ''.join(
5578         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5579         for c in s)
5580
5581
5582 def rot47(s):
5583     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5584
5585
5586 def parse_m3u8_attributes(attrib):
5587     info = {}
5588     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5589         if val.startswith('"'):
5590             val = val[1:-1]
5591         info[key] = val
5592     return info
5593
5594
5595 def urshift(val, n):
5596     return val >> n if val >= 0 else (val + 0x100000000) >> n
5597
5598
5599 # Based on png2str() written by @gdkchan and improved by @yokrysty
5600 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5601 def decode_png(png_data):
5602     # Reference: https://www.w3.org/TR/PNG/
5603     header = png_data[8:]
5604
5605     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5606         raise IOError('Not a valid PNG file.')
5607
5608     int_map = {1: '>B', 2: '>H', 4: '>I'}
5609     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5610
5611     chunks = []
5612
5613     while header:
5614         length = unpack_integer(header[:4])
5615         header = header[4:]
5616
5617         chunk_type = header[:4]
5618         header = header[4:]
5619
5620         chunk_data = header[:length]
5621         header = header[length:]
5622
5623         header = header[4:]  # Skip CRC
5624
5625         chunks.append({
5626             'type': chunk_type,
5627             'length': length,
5628             'data': chunk_data
5629         })
5630
5631     ihdr = chunks[0]['data']
5632
5633     width = unpack_integer(ihdr[:4])
5634     height = unpack_integer(ihdr[4:8])
5635
5636     idat = b''
5637
5638     for chunk in chunks:
5639         if chunk['type'] == b'IDAT':
5640             idat += chunk['data']
5641
5642     if not idat:
5643         raise IOError('Unable to read PNG data.')
5644
5645     decompressed_data = bytearray(zlib.decompress(idat))
5646
5647     stride = width * 3
5648     pixels = []
5649
5650     def _get_pixel(idx):
5651         x = idx % stride
5652         y = idx // stride
5653         return pixels[y][x]
5654
5655     for y in range(height):
5656         basePos = y * (1 + stride)
5657         filter_type = decompressed_data[basePos]
5658
5659         current_row = []
5660
5661         pixels.append(current_row)
5662
5663         for x in range(stride):
5664             color = decompressed_data[1 + basePos + x]
5665             basex = y * stride + x
5666             left = 0
5667             up = 0
5668
5669             if x > 2:
5670                 left = _get_pixel(basex - 3)
5671             if y > 0:
5672                 up = _get_pixel(basex - stride)
5673
5674             if filter_type == 1:  # Sub
5675                 color = (color + left) & 0xff
5676             elif filter_type == 2:  # Up
5677                 color = (color + up) & 0xff
5678             elif filter_type == 3:  # Average
5679                 color = (color + ((left + up) >> 1)) & 0xff
5680             elif filter_type == 4:  # Paeth
5681                 a = left
5682                 b = up
5683                 c = 0
5684
5685                 if x > 2 and y > 0:
5686                     c = _get_pixel(basex - stride - 3)
5687
5688                 p = a + b - c
5689
5690                 pa = abs(p - a)
5691                 pb = abs(p - b)
5692                 pc = abs(p - c)
5693
5694                 if pa <= pb and pa <= pc:
5695                     color = (color + a) & 0xff
5696                 elif pb <= pc:
5697                     color = (color + b) & 0xff
5698                 else:
5699                     color = (color + c) & 0xff
5700
5701             current_row.append(color)
5702
5703     return width, height, pixels
5704
5705
5706 def write_xattr(path, key, value):
5707     # This mess below finds the best xattr tool for the job
5708     try:
5709         # try the pyxattr module...
5710         import xattr
5711
5712         if hasattr(xattr, 'set'):  # pyxattr
5713             # Unicode arguments are not supported in python-pyxattr until
5714             # version 0.5.0
5715             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5716             pyxattr_required_version = '0.5.0'
5717             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5718                 # TODO: fallback to CLI tools
5719                 raise XAttrUnavailableError(
5720                     'python-pyxattr is detected but is too old. '
5721                     'youtube-dlc requires %s or above while your version is %s. '
5722                     'Falling back to other xattr implementations' % (
5723                         pyxattr_required_version, xattr.__version__))
5724
5725             setxattr = xattr.set
5726         else:  # xattr
5727             setxattr = xattr.setxattr
5728
5729         try:
5730             setxattr(path, key, value)
5731         except EnvironmentError as e:
5732             raise XAttrMetadataError(e.errno, e.strerror)
5733
5734     except ImportError:
5735         if compat_os_name == 'nt':
5736             # Write xattrs to NTFS Alternate Data Streams:
5737             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5738             assert ':' not in key
5739             assert os.path.exists(path)
5740
5741             ads_fn = path + ':' + key
5742             try:
5743                 with open(ads_fn, 'wb') as f:
5744                     f.write(value)
5745             except EnvironmentError as e:
5746                 raise XAttrMetadataError(e.errno, e.strerror)
5747         else:
5748             user_has_setfattr = check_executable('setfattr', ['--version'])
5749             user_has_xattr = check_executable('xattr', ['-h'])
5750
5751             if user_has_setfattr or user_has_xattr:
5752
5753                 value = value.decode('utf-8')
5754                 if user_has_setfattr:
5755                     executable = 'setfattr'
5756                     opts = ['-n', key, '-v', value]
5757                 elif user_has_xattr:
5758                     executable = 'xattr'
5759                     opts = ['-w', key, value]
5760
5761                 cmd = ([encodeFilename(executable, True)]
5762                        + [encodeArgument(o) for o in opts]
5763                        + [encodeFilename(path, True)])
5764
5765                 try:
5766                     p = subprocess.Popen(
5767                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5768                 except EnvironmentError as e:
5769                     raise XAttrMetadataError(e.errno, e.strerror)
5770                 stdout, stderr = process_communicate_or_kill(p)
5771                 stderr = stderr.decode('utf-8', 'replace')
5772                 if p.returncode != 0:
5773                     raise XAttrMetadataError(p.returncode, stderr)
5774
5775             else:
5776                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5777                 if sys.platform.startswith('linux'):
5778                     raise XAttrUnavailableError(
5779                         "Couldn't find a tool to set the xattrs. "
5780                         "Install either the python 'pyxattr' or 'xattr' "
5781                         "modules, or the GNU 'attr' package "
5782                         "(which contains the 'setfattr' tool).")
5783                 else:
5784                     raise XAttrUnavailableError(
5785                         "Couldn't find a tool to set the xattrs. "
5786                         "Install either the python 'xattr' module, "
5787                         "or the 'xattr' binary.")
5788
5789
5790 def random_birthday(year_field, month_field, day_field):
5791     start_date = datetime.date(1950, 1, 1)
5792     end_date = datetime.date(1995, 12, 31)
5793     offset = random.randint(0, (end_date - start_date).days)
5794     random_date = start_date + datetime.timedelta(offset)
5795     return {
5796         year_field: str(random_date.year),
5797         month_field: str(random_date.month),
5798         day_field: str(random_date.day),
5799     }
5800
5801
5802 # Templates for internet shortcut files, which are plain text files.
5803 DOT_URL_LINK_TEMPLATE = '''
5804 [InternetShortcut]
5805 URL=%(url)s
5806 '''.lstrip()
5807
5808 DOT_WEBLOC_LINK_TEMPLATE = '''
5809 <?xml version="1.0" encoding="UTF-8"?>
5810 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5811 <plist version="1.0">
5812 <dict>
5813 \t<key>URL</key>
5814 \t<string>%(url)s</string>
5815 </dict>
5816 </plist>
5817 '''.lstrip()
5818
5819 DOT_DESKTOP_LINK_TEMPLATE = '''
5820 [Desktop Entry]
5821 Encoding=UTF-8
5822 Name=%(filename)s
5823 Type=Link
5824 URL=%(url)s
5825 Icon=text-html
5826 '''.lstrip()
5827
5828
5829 def iri_to_uri(iri):
5830     """
5831     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5832
5833     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5834     """
5835
5836     iri_parts = compat_urllib_parse_urlparse(iri)
5837
5838     if '[' in iri_parts.netloc:
5839         raise ValueError('IPv6 URIs are not, yet, supported.')
5840         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5841
5842     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5843
5844     net_location = ''
5845     if iri_parts.username:
5846         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5847         if iri_parts.password is not None:
5848             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5849         net_location += '@'
5850
5851     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5852     # The 'idna' encoding produces ASCII text.
5853     if iri_parts.port is not None and iri_parts.port != 80:
5854         net_location += ':' + str(iri_parts.port)
5855
5856     return compat_urllib_parse_urlunparse(
5857         (iri_parts.scheme,
5858             net_location,
5859
5860             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5861
5862             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5863             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5864
5865             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5866             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5867
5868             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5869
5870     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5871
5872
5873 def to_high_limit_path(path):
5874     if sys.platform in ['win32', 'cygwin']:
5875         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5876         return r'\\?\ '.rstrip() + os.path.abspath(path)
5877
5878     return path
5879
5880
5881 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5882     val = obj.get(field, default)
5883     if func and val not in ignore:
5884         val = func(val)
5885     return template % val if val not in ignore else default
5886
5887
5888 def clean_podcast_url(url):
5889     return re.sub(r'''(?x)
5890         (?:
5891             (?:
5892                 chtbl\.com/track|
5893                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5894                 play\.podtrac\.com
5895             )/[^/]+|
5896             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5897             flex\.acast\.com|
5898             pd(?:
5899                 cn\.co| # https://podcorn.com/analytics-prefix/
5900                 st\.fm # https://podsights.com/docs/
5901             )/e
5902         )/''', '', url)
5903
5904
5905 _HEX_TABLE = '0123456789abcdef'
5906
5907
5908 def random_uuidv4():
5909     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5910
5911
5912 def make_dir(path, to_screen=None):
5913     try:
5914         dn = os.path.dirname(path)
5915         if dn and not os.path.exists(dn):
5916             os.makedirs(dn)
5917         return True
5918     except (OSError, IOError) as err:
5919         if callable(to_screen) is not None:
5920             to_screen('unable to create directory ' + error_to_compat_str(err))
5921         return False
5922
5923
5924 def get_executable_path():
5925     path = os.path.dirname(sys.argv[0])
5926     if os.path.abspath(sys.argv[0]) != os.path.abspath(sys.executable):  # Not packaged
5927         path = os.path.join(path, '..')
5928     return os.path.abspath(path)
5929
5930
5931 def load_plugins(name, type, namespace):
5932     plugin_info = [None]
5933     classes = []
5934     try:
5935         plugin_info = imp.find_module(
5936             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5937         plugins = imp.load_module(name, *plugin_info)
5938         for name in dir(plugins):
5939             if not name.endswith(type):
5940                 continue
5941             klass = getattr(plugins, name)
5942             classes.append(klass)
5943             namespace[name] = klass
5944     except ImportError:
5945         pass
5946     finally:
5947         if plugin_info[0] is not None:
5948             plugin_info[0].close()
5949     return classes
5950
5951
5952 def traverse_dict(dictn, keys, casesense=True):
5953     if not isinstance(dictn, dict):
5954         return None
5955     first_key = keys[0]
5956     if not casesense:
5957         dictn = {key.lower(): val for key, val in dictn.items()}
5958         first_key = first_key.lower()
5959     value = dictn.get(first_key, None)
5960     return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)