youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_urlunparse,
  64     compat_urllib_parse_quote,
  65     compat_urllib_parse_quote_plus,
  66     compat_urllib_parse_unquote_plus,
  67     compat_urllib_request,
  68     compat_urlparse,
  69     compat_xpath,
  70 )
  71
  72 from .socks import (
  73     ProxyType,
  74     sockssocket,
  75 )
  76
  77
  78 def register_socks_protocols():
  79     # "Register" SOCKS protocols
  80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  83         if scheme not in compat_urlparse.uses_netloc:
  84             compat_urlparse.uses_netloc.append(scheme)
  85
  86
  87 # This is not clearly defined otherwise
  88 compiled_regex_type = type(re.compile(''))
  89
  90
  91 def random_user_agent():
  92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  93     _CHROME_VERSIONS = (
  94         '74.0.3729.129',
  95         '76.0.3780.3',
  96         '76.0.3780.2',
  97         '74.0.3729.128',
  98         '76.0.3780.1',
  99         '76.0.3780.0',
 100         '75.0.3770.15',
 101         '74.0.3729.127',
 102         '74.0.3729.126',
 103         '76.0.3779.1',
 104         '76.0.3779.0',
 105         '75.0.3770.14',
 106         '74.0.3729.125',
 107         '76.0.3778.1',
 108         '76.0.3778.0',
 109         '75.0.3770.13',
 110         '74.0.3729.124',
 111         '74.0.3729.123',
 112         '73.0.3683.121',
 113         '76.0.3777.1',
 114         '76.0.3777.0',
 115         '75.0.3770.12',
 116         '74.0.3729.122',
 117         '76.0.3776.4',
 118         '75.0.3770.11',
 119         '74.0.3729.121',
 120         '76.0.3776.3',
 121         '76.0.3776.2',
 122         '73.0.3683.120',
 123         '74.0.3729.120',
 124         '74.0.3729.119',
 125         '74.0.3729.118',
 126         '76.0.3776.1',
 127         '76.0.3776.0',
 128         '76.0.3775.5',
 129         '75.0.3770.10',
 130         '74.0.3729.117',
 131         '76.0.3775.4',
 132         '76.0.3775.3',
 133         '74.0.3729.116',
 134         '75.0.3770.9',
 135         '76.0.3775.2',
 136         '76.0.3775.1',
 137         '76.0.3775.0',
 138         '75.0.3770.8',
 139         '74.0.3729.115',
 140         '74.0.3729.114',
 141         '76.0.3774.1',
 142         '76.0.3774.0',
 143         '75.0.3770.7',
 144         '74.0.3729.113',
 145         '74.0.3729.112',
 146         '74.0.3729.111',
 147         '76.0.3773.1',
 148         '76.0.3773.0',
 149         '75.0.3770.6',
 150         '74.0.3729.110',
 151         '74.0.3729.109',
 152         '76.0.3772.1',
 153         '76.0.3772.0',
 154         '75.0.3770.5',
 155         '74.0.3729.108',
 156         '74.0.3729.107',
 157         '76.0.3771.1',
 158         '76.0.3771.0',
 159         '75.0.3770.4',
 160         '74.0.3729.106',
 161         '74.0.3729.105',
 162         '75.0.3770.3',
 163         '74.0.3729.104',
 164         '74.0.3729.103',
 165         '74.0.3729.102',
 166         '75.0.3770.2',
 167         '74.0.3729.101',
 168         '75.0.3770.1',
 169         '75.0.3770.0',
 170         '74.0.3729.100',
 171         '75.0.3769.5',
 172         '75.0.3769.4',
 173         '74.0.3729.99',
 174         '75.0.3769.3',
 175         '75.0.3769.2',
 176         '75.0.3768.6',
 177         '74.0.3729.98',
 178         '75.0.3769.1',
 179         '75.0.3769.0',
 180         '74.0.3729.97',
 181         '73.0.3683.119',
 182         '73.0.3683.118',
 183         '74.0.3729.96',
 184         '75.0.3768.5',
 185         '75.0.3768.4',
 186         '75.0.3768.3',
 187         '75.0.3768.2',
 188         '74.0.3729.95',
 189         '74.0.3729.94',
 190         '75.0.3768.1',
 191         '75.0.3768.0',
 192         '74.0.3729.93',
 193         '74.0.3729.92',
 194         '73.0.3683.117',
 195         '74.0.3729.91',
 196         '75.0.3766.3',
 197         '74.0.3729.90',
 198         '75.0.3767.2',
 199         '75.0.3767.1',
 200         '75.0.3767.0',
 201         '74.0.3729.89',
 202         '73.0.3683.116',
 203         '75.0.3766.2',
 204         '74.0.3729.88',
 205         '75.0.3766.1',
 206         '75.0.3766.0',
 207         '74.0.3729.87',
 208         '73.0.3683.115',
 209         '74.0.3729.86',
 210         '75.0.3765.1',
 211         '75.0.3765.0',
 212         '74.0.3729.85',
 213         '73.0.3683.114',
 214         '74.0.3729.84',
 215         '75.0.3764.1',
 216         '75.0.3764.0',
 217         '74.0.3729.83',
 218         '73.0.3683.113',
 219         '75.0.3763.2',
 220         '75.0.3761.4',
 221         '74.0.3729.82',
 222         '75.0.3763.1',
 223         '75.0.3763.0',
 224         '74.0.3729.81',
 225         '73.0.3683.112',
 226         '75.0.3762.1',
 227         '75.0.3762.0',
 228         '74.0.3729.80',
 229         '75.0.3761.3',
 230         '74.0.3729.79',
 231         '73.0.3683.111',
 232         '75.0.3761.2',
 233         '74.0.3729.78',
 234         '74.0.3729.77',
 235         '75.0.3761.1',
 236         '75.0.3761.0',
 237         '73.0.3683.110',
 238         '74.0.3729.76',
 239         '74.0.3729.75',
 240         '75.0.3760.0',
 241         '74.0.3729.74',
 242         '75.0.3759.8',
 243         '75.0.3759.7',
 244         '75.0.3759.6',
 245         '74.0.3729.73',
 246         '75.0.3759.5',
 247         '74.0.3729.72',
 248         '73.0.3683.109',
 249         '75.0.3759.4',
 250         '75.0.3759.3',
 251         '74.0.3729.71',
 252         '75.0.3759.2',
 253         '74.0.3729.70',
 254         '73.0.3683.108',
 255         '74.0.3729.69',
 256         '75.0.3759.1',
 257         '75.0.3759.0',
 258         '74.0.3729.68',
 259         '73.0.3683.107',
 260         '74.0.3729.67',
 261         '75.0.3758.1',
 262         '75.0.3758.0',
 263         '74.0.3729.66',
 264         '73.0.3683.106',
 265         '74.0.3729.65',
 266         '75.0.3757.1',
 267         '75.0.3757.0',
 268         '74.0.3729.64',
 269         '73.0.3683.105',
 270         '74.0.3729.63',
 271         '75.0.3756.1',
 272         '75.0.3756.0',
 273         '74.0.3729.62',
 274         '73.0.3683.104',
 275         '75.0.3755.3',
 276         '75.0.3755.2',
 277         '73.0.3683.103',
 278         '75.0.3755.1',
 279         '75.0.3755.0',
 280         '74.0.3729.61',
 281         '73.0.3683.102',
 282         '74.0.3729.60',
 283         '75.0.3754.2',
 284         '74.0.3729.59',
 285         '75.0.3753.4',
 286         '74.0.3729.58',
 287         '75.0.3754.1',
 288         '75.0.3754.0',
 289         '74.0.3729.57',
 290         '73.0.3683.101',
 291         '75.0.3753.3',
 292         '75.0.3752.2',
 293         '75.0.3753.2',
 294         '74.0.3729.56',
 295         '75.0.3753.1',
 296         '75.0.3753.0',
 297         '74.0.3729.55',
 298         '73.0.3683.100',
 299         '74.0.3729.54',
 300         '75.0.3752.1',
 301         '75.0.3752.0',
 302         '74.0.3729.53',
 303         '73.0.3683.99',
 304         '74.0.3729.52',
 305         '75.0.3751.1',
 306         '75.0.3751.0',
 307         '74.0.3729.51',
 308         '73.0.3683.98',
 309         '74.0.3729.50',
 310         '75.0.3750.0',
 311         '74.0.3729.49',
 312         '74.0.3729.48',
 313         '74.0.3729.47',
 314         '75.0.3749.3',
 315         '74.0.3729.46',
 316         '73.0.3683.97',
 317         '75.0.3749.2',
 318         '74.0.3729.45',
 319         '75.0.3749.1',
 320         '75.0.3749.0',
 321         '74.0.3729.44',
 322         '73.0.3683.96',
 323         '74.0.3729.43',
 324         '74.0.3729.42',
 325         '75.0.3748.1',
 326         '75.0.3748.0',
 327         '74.0.3729.41',
 328         '75.0.3747.1',
 329         '73.0.3683.95',
 330         '75.0.3746.4',
 331         '74.0.3729.40',
 332         '74.0.3729.39',
 333         '75.0.3747.0',
 334         '75.0.3746.3',
 335         '75.0.3746.2',
 336         '74.0.3729.38',
 337         '75.0.3746.1',
 338         '75.0.3746.0',
 339         '74.0.3729.37',
 340         '73.0.3683.94',
 341         '75.0.3745.5',
 342         '75.0.3745.4',
 343         '75.0.3745.3',
 344         '75.0.3745.2',
 345         '74.0.3729.36',
 346         '75.0.3745.1',
 347         '75.0.3745.0',
 348         '75.0.3744.2',
 349         '74.0.3729.35',
 350         '73.0.3683.93',
 351         '74.0.3729.34',
 352         '75.0.3744.1',
 353         '75.0.3744.0',
 354         '74.0.3729.33',
 355         '73.0.3683.92',
 356         '74.0.3729.32',
 357         '74.0.3729.31',
 358         '73.0.3683.91',
 359         '75.0.3741.2',
 360         '75.0.3740.5',
 361         '74.0.3729.30',
 362         '75.0.3741.1',
 363         '75.0.3741.0',
 364         '74.0.3729.29',
 365         '75.0.3740.4',
 366         '73.0.3683.90',
 367         '74.0.3729.28',
 368         '75.0.3740.3',
 369         '73.0.3683.89',
 370         '75.0.3740.2',
 371         '74.0.3729.27',
 372         '75.0.3740.1',
 373         '75.0.3740.0',
 374         '74.0.3729.26',
 375         '73.0.3683.88',
 376         '73.0.3683.87',
 377         '74.0.3729.25',
 378         '75.0.3739.1',
 379         '75.0.3739.0',
 380         '73.0.3683.86',
 381         '74.0.3729.24',
 382         '73.0.3683.85',
 383         '75.0.3738.4',
 384         '75.0.3738.3',
 385         '75.0.3738.2',
 386         '75.0.3738.1',
 387         '75.0.3738.0',
 388         '74.0.3729.23',
 389         '73.0.3683.84',
 390         '74.0.3729.22',
 391         '74.0.3729.21',
 392         '75.0.3737.1',
 393         '75.0.3737.0',
 394         '74.0.3729.20',
 395         '73.0.3683.83',
 396         '74.0.3729.19',
 397         '75.0.3736.1',
 398         '75.0.3736.0',
 399         '74.0.3729.18',
 400         '73.0.3683.82',
 401         '74.0.3729.17',
 402         '75.0.3735.1',
 403         '75.0.3735.0',
 404         '74.0.3729.16',
 405         '73.0.3683.81',
 406         '75.0.3734.1',
 407         '75.0.3734.0',
 408         '74.0.3729.15',
 409         '73.0.3683.80',
 410         '74.0.3729.14',
 411         '75.0.3733.1',
 412         '75.0.3733.0',
 413         '75.0.3732.1',
 414         '74.0.3729.13',
 415         '74.0.3729.12',
 416         '73.0.3683.79',
 417         '74.0.3729.11',
 418         '75.0.3732.0',
 419         '74.0.3729.10',
 420         '73.0.3683.78',
 421         '74.0.3729.9',
 422         '74.0.3729.8',
 423         '74.0.3729.7',
 424         '75.0.3731.3',
 425         '75.0.3731.2',
 426         '75.0.3731.0',
 427         '74.0.3729.6',
 428         '73.0.3683.77',
 429         '73.0.3683.76',
 430         '75.0.3730.5',
 431         '75.0.3730.4',
 432         '73.0.3683.75',
 433         '74.0.3729.5',
 434         '73.0.3683.74',
 435         '75.0.3730.3',
 436         '75.0.3730.2',
 437         '74.0.3729.4',
 438         '73.0.3683.73',
 439         '73.0.3683.72',
 440         '75.0.3730.1',
 441         '75.0.3730.0',
 442         '74.0.3729.3',
 443         '73.0.3683.71',
 444         '74.0.3729.2',
 445         '73.0.3683.70',
 446         '74.0.3729.1',
 447         '74.0.3729.0',
 448         '74.0.3726.4',
 449         '73.0.3683.69',
 450         '74.0.3726.3',
 451         '74.0.3728.0',
 452         '74.0.3726.2',
 453         '73.0.3683.68',
 454         '74.0.3726.1',
 455         '74.0.3726.0',
 456         '74.0.3725.4',
 457         '73.0.3683.67',
 458         '73.0.3683.66',
 459         '74.0.3725.3',
 460         '74.0.3725.2',
 461         '74.0.3725.1',
 462         '74.0.3724.8',
 463         '74.0.3725.0',
 464         '73.0.3683.65',
 465         '74.0.3724.7',
 466         '74.0.3724.6',
 467         '74.0.3724.5',
 468         '74.0.3724.4',
 469         '74.0.3724.3',
 470         '74.0.3724.2',
 471         '74.0.3724.1',
 472         '74.0.3724.0',
 473         '73.0.3683.64',
 474         '74.0.3723.1',
 475         '74.0.3723.0',
 476         '73.0.3683.63',
 477         '74.0.3722.1',
 478         '74.0.3722.0',
 479         '73.0.3683.62',
 480         '74.0.3718.9',
 481         '74.0.3702.3',
 482         '74.0.3721.3',
 483         '74.0.3721.2',
 484         '74.0.3721.1',
 485         '74.0.3721.0',
 486         '74.0.3720.6',
 487         '73.0.3683.61',
 488         '72.0.3626.122',
 489         '73.0.3683.60',
 490         '74.0.3720.5',
 491         '72.0.3626.121',
 492         '74.0.3718.8',
 493         '74.0.3720.4',
 494         '74.0.3720.3',
 495         '74.0.3718.7',
 496         '74.0.3720.2',
 497         '74.0.3720.1',
 498         '74.0.3720.0',
 499         '74.0.3718.6',
 500         '74.0.3719.5',
 501         '73.0.3683.59',
 502         '74.0.3718.5',
 503         '74.0.3718.4',
 504         '74.0.3719.4',
 505         '74.0.3719.3',
 506         '74.0.3719.2',
 507         '74.0.3719.1',
 508         '73.0.3683.58',
 509         '74.0.3719.0',
 510         '73.0.3683.57',
 511         '73.0.3683.56',
 512         '74.0.3718.3',
 513         '73.0.3683.55',
 514         '74.0.3718.2',
 515         '74.0.3718.1',
 516         '74.0.3718.0',
 517         '73.0.3683.54',
 518         '74.0.3717.2',
 519         '73.0.3683.53',
 520         '74.0.3717.1',
 521         '74.0.3717.0',
 522         '73.0.3683.52',
 523         '74.0.3716.1',
 524         '74.0.3716.0',
 525         '73.0.3683.51',
 526         '74.0.3715.1',
 527         '74.0.3715.0',
 528         '73.0.3683.50',
 529         '74.0.3711.2',
 530         '74.0.3714.2',
 531         '74.0.3713.3',
 532         '74.0.3714.1',
 533         '74.0.3714.0',
 534         '73.0.3683.49',
 535         '74.0.3713.1',
 536         '74.0.3713.0',
 537         '72.0.3626.120',
 538         '73.0.3683.48',
 539         '74.0.3712.2',
 540         '74.0.3712.1',
 541         '74.0.3712.0',
 542         '73.0.3683.47',
 543         '72.0.3626.119',
 544         '73.0.3683.46',
 545         '74.0.3710.2',
 546         '72.0.3626.118',
 547         '74.0.3711.1',
 548         '74.0.3711.0',
 549         '73.0.3683.45',
 550         '72.0.3626.117',
 551         '74.0.3710.1',
 552         '74.0.3710.0',
 553         '73.0.3683.44',
 554         '72.0.3626.116',
 555         '74.0.3709.1',
 556         '74.0.3709.0',
 557         '74.0.3704.9',
 558         '73.0.3683.43',
 559         '72.0.3626.115',
 560         '74.0.3704.8',
 561         '74.0.3704.7',
 562         '74.0.3708.0',
 563         '74.0.3706.7',
 564         '74.0.3704.6',
 565         '73.0.3683.42',
 566         '72.0.3626.114',
 567         '74.0.3706.6',
 568         '72.0.3626.113',
 569         '74.0.3704.5',
 570         '74.0.3706.5',
 571         '74.0.3706.4',
 572         '74.0.3706.3',
 573         '74.0.3706.2',
 574         '74.0.3706.1',
 575         '74.0.3706.0',
 576         '73.0.3683.41',
 577         '72.0.3626.112',
 578         '74.0.3705.1',
 579         '74.0.3705.0',
 580         '73.0.3683.40',
 581         '72.0.3626.111',
 582         '73.0.3683.39',
 583         '74.0.3704.4',
 584         '73.0.3683.38',
 585         '74.0.3704.3',
 586         '74.0.3704.2',
 587         '74.0.3704.1',
 588         '74.0.3704.0',
 589         '73.0.3683.37',
 590         '72.0.3626.110',
 591         '72.0.3626.109',
 592         '74.0.3703.3',
 593         '74.0.3703.2',
 594         '73.0.3683.36',
 595         '74.0.3703.1',
 596         '74.0.3703.0',
 597         '73.0.3683.35',
 598         '72.0.3626.108',
 599         '74.0.3702.2',
 600         '74.0.3699.3',
 601         '74.0.3702.1',
 602         '74.0.3702.0',
 603         '73.0.3683.34',
 604         '72.0.3626.107',
 605         '73.0.3683.33',
 606         '74.0.3701.1',
 607         '74.0.3701.0',
 608         '73.0.3683.32',
 609         '73.0.3683.31',
 610         '72.0.3626.105',
 611         '74.0.3700.1',
 612         '74.0.3700.0',
 613         '73.0.3683.29',
 614         '72.0.3626.103',
 615         '74.0.3699.2',
 616         '74.0.3699.1',
 617         '74.0.3699.0',
 618         '73.0.3683.28',
 619         '72.0.3626.102',
 620         '73.0.3683.27',
 621         '73.0.3683.26',
 622         '74.0.3698.0',
 623         '74.0.3696.2',
 624         '72.0.3626.101',
 625         '73.0.3683.25',
 626         '74.0.3696.1',
 627         '74.0.3696.0',
 628         '74.0.3694.8',
 629         '72.0.3626.100',
 630         '74.0.3694.7',
 631         '74.0.3694.6',
 632         '74.0.3694.5',
 633         '74.0.3694.4',
 634         '72.0.3626.99',
 635         '72.0.3626.98',
 636         '74.0.3694.3',
 637         '73.0.3683.24',
 638         '72.0.3626.97',
 639         '72.0.3626.96',
 640         '72.0.3626.95',
 641         '73.0.3683.23',
 642         '72.0.3626.94',
 643         '73.0.3683.22',
 644         '73.0.3683.21',
 645         '72.0.3626.93',
 646         '74.0.3694.2',
 647         '72.0.3626.92',
 648         '74.0.3694.1',
 649         '74.0.3694.0',
 650         '74.0.3693.6',
 651         '73.0.3683.20',
 652         '72.0.3626.91',
 653         '74.0.3693.5',
 654         '74.0.3693.4',
 655         '74.0.3693.3',
 656         '74.0.3693.2',
 657         '73.0.3683.19',
 658         '74.0.3693.1',
 659         '74.0.3693.0',
 660         '73.0.3683.18',
 661         '72.0.3626.90',
 662         '74.0.3692.1',
 663         '74.0.3692.0',
 664         '73.0.3683.17',
 665         '72.0.3626.89',
 666         '74.0.3687.3',
 667         '74.0.3691.1',
 668         '74.0.3691.0',
 669         '73.0.3683.16',
 670         '72.0.3626.88',
 671         '72.0.3626.87',
 672         '73.0.3683.15',
 673         '74.0.3690.1',
 674         '74.0.3690.0',
 675         '73.0.3683.14',
 676         '72.0.3626.86',
 677         '73.0.3683.13',
 678         '73.0.3683.12',
 679         '74.0.3689.1',
 680         '74.0.3689.0',
 681         '73.0.3683.11',
 682         '72.0.3626.85',
 683         '73.0.3683.10',
 684         '72.0.3626.84',
 685         '73.0.3683.9',
 686         '74.0.3688.1',
 687         '74.0.3688.0',
 688         '73.0.3683.8',
 689         '72.0.3626.83',
 690         '74.0.3687.2',
 691         '74.0.3687.1',
 692         '74.0.3687.0',
 693         '73.0.3683.7',
 694         '72.0.3626.82',
 695         '74.0.3686.4',
 696         '72.0.3626.81',
 697         '74.0.3686.3',
 698         '74.0.3686.2',
 699         '74.0.3686.1',
 700         '74.0.3686.0',
 701         '73.0.3683.6',
 702         '72.0.3626.80',
 703         '74.0.3685.1',
 704         '74.0.3685.0',
 705         '73.0.3683.5',
 706         '72.0.3626.79',
 707         '74.0.3684.1',
 708         '74.0.3684.0',
 709         '73.0.3683.4',
 710         '72.0.3626.78',
 711         '72.0.3626.77',
 712         '73.0.3683.3',
 713         '73.0.3683.2',
 714         '72.0.3626.76',
 715         '73.0.3683.1',
 716         '73.0.3683.0',
 717         '72.0.3626.75',
 718         '71.0.3578.141',
 719         '73.0.3682.1',
 720         '73.0.3682.0',
 721         '72.0.3626.74',
 722         '71.0.3578.140',
 723         '73.0.3681.4',
 724         '73.0.3681.3',
 725         '73.0.3681.2',
 726         '73.0.3681.1',
 727         '73.0.3681.0',
 728         '72.0.3626.73',
 729         '71.0.3578.139',
 730         '72.0.3626.72',
 731         '72.0.3626.71',
 732         '73.0.3680.1',
 733         '73.0.3680.0',
 734         '72.0.3626.70',
 735         '71.0.3578.138',
 736         '73.0.3678.2',
 737         '73.0.3679.1',
 738         '73.0.3679.0',
 739         '72.0.3626.69',
 740         '71.0.3578.137',
 741         '73.0.3678.1',
 742         '73.0.3678.0',
 743         '71.0.3578.136',
 744         '73.0.3677.1',
 745         '73.0.3677.0',
 746         '72.0.3626.68',
 747         '72.0.3626.67',
 748         '71.0.3578.135',
 749         '73.0.3676.1',
 750         '73.0.3676.0',
 751         '73.0.3674.2',
 752         '72.0.3626.66',
 753         '71.0.3578.134',
 754         '73.0.3674.1',
 755         '73.0.3674.0',
 756         '72.0.3626.65',
 757         '71.0.3578.133',
 758         '73.0.3673.2',
 759         '73.0.3673.1',
 760         '73.0.3673.0',
 761         '72.0.3626.64',
 762         '71.0.3578.132',
 763         '72.0.3626.63',
 764         '72.0.3626.62',
 765         '72.0.3626.61',
 766         '72.0.3626.60',
 767         '73.0.3672.1',
 768         '73.0.3672.0',
 769         '72.0.3626.59',
 770         '71.0.3578.131',
 771         '73.0.3671.3',
 772         '73.0.3671.2',
 773         '73.0.3671.1',
 774         '73.0.3671.0',
 775         '72.0.3626.58',
 776         '71.0.3578.130',
 777         '73.0.3670.1',
 778         '73.0.3670.0',
 779         '72.0.3626.57',
 780         '71.0.3578.129',
 781         '73.0.3669.1',
 782         '73.0.3669.0',
 783         '72.0.3626.56',
 784         '71.0.3578.128',
 785         '73.0.3668.2',
 786         '73.0.3668.1',
 787         '73.0.3668.0',
 788         '72.0.3626.55',
 789         '71.0.3578.127',
 790         '73.0.3667.2',
 791         '73.0.3667.1',
 792         '73.0.3667.0',
 793         '72.0.3626.54',
 794         '71.0.3578.126',
 795         '73.0.3666.1',
 796         '73.0.3666.0',
 797         '72.0.3626.53',
 798         '71.0.3578.125',
 799         '73.0.3665.4',
 800         '73.0.3665.3',
 801         '72.0.3626.52',
 802         '73.0.3665.2',
 803         '73.0.3664.4',
 804         '73.0.3665.1',
 805         '73.0.3665.0',
 806         '72.0.3626.51',
 807         '71.0.3578.124',
 808         '72.0.3626.50',
 809         '73.0.3664.3',
 810         '73.0.3664.2',
 811         '73.0.3664.1',
 812         '73.0.3664.0',
 813         '73.0.3663.2',
 814         '72.0.3626.49',
 815         '71.0.3578.123',
 816         '73.0.3663.1',
 817         '73.0.3663.0',
 818         '72.0.3626.48',
 819         '71.0.3578.122',
 820         '73.0.3662.1',
 821         '73.0.3662.0',
 822         '72.0.3626.47',
 823         '71.0.3578.121',
 824         '73.0.3661.1',
 825         '72.0.3626.46',
 826         '73.0.3661.0',
 827         '72.0.3626.45',
 828         '71.0.3578.120',
 829         '73.0.3660.2',
 830         '73.0.3660.1',
 831         '73.0.3660.0',
 832         '72.0.3626.44',
 833         '71.0.3578.119',
 834         '73.0.3659.1',
 835         '73.0.3659.0',
 836         '72.0.3626.43',
 837         '71.0.3578.118',
 838         '73.0.3658.1',
 839         '73.0.3658.0',
 840         '72.0.3626.42',
 841         '71.0.3578.117',
 842         '73.0.3657.1',
 843         '73.0.3657.0',
 844         '72.0.3626.41',
 845         '71.0.3578.116',
 846         '73.0.3656.1',
 847         '73.0.3656.0',
 848         '72.0.3626.40',
 849         '71.0.3578.115',
 850         '73.0.3655.1',
 851         '73.0.3655.0',
 852         '72.0.3626.39',
 853         '71.0.3578.114',
 854         '73.0.3654.1',
 855         '73.0.3654.0',
 856         '72.0.3626.38',
 857         '71.0.3578.113',
 858         '73.0.3653.1',
 859         '73.0.3653.0',
 860         '72.0.3626.37',
 861         '71.0.3578.112',
 862         '73.0.3652.1',
 863         '73.0.3652.0',
 864         '72.0.3626.36',
 865         '71.0.3578.111',
 866         '73.0.3651.1',
 867         '73.0.3651.0',
 868         '72.0.3626.35',
 869         '71.0.3578.110',
 870         '73.0.3650.1',
 871         '73.0.3650.0',
 872         '72.0.3626.34',
 873         '71.0.3578.109',
 874         '73.0.3649.1',
 875         '73.0.3649.0',
 876         '72.0.3626.33',
 877         '71.0.3578.108',
 878         '73.0.3648.2',
 879         '73.0.3648.1',
 880         '73.0.3648.0',
 881         '72.0.3626.32',
 882         '71.0.3578.107',
 883         '73.0.3647.2',
 884         '73.0.3647.1',
 885         '73.0.3647.0',
 886         '72.0.3626.31',
 887         '71.0.3578.106',
 888         '73.0.3635.3',
 889         '73.0.3646.2',
 890         '73.0.3646.1',
 891         '73.0.3646.0',
 892         '72.0.3626.30',
 893         '71.0.3578.105',
 894         '72.0.3626.29',
 895         '73.0.3645.2',
 896         '73.0.3645.1',
 897         '73.0.3645.0',
 898         '72.0.3626.28',
 899         '71.0.3578.104',
 900         '72.0.3626.27',
 901         '72.0.3626.26',
 902         '72.0.3626.25',
 903         '72.0.3626.24',
 904         '73.0.3644.0',
 905         '73.0.3643.2',
 906         '72.0.3626.23',
 907         '71.0.3578.103',
 908         '73.0.3643.1',
 909         '73.0.3643.0',
 910         '72.0.3626.22',
 911         '71.0.3578.102',
 912         '73.0.3642.1',
 913         '73.0.3642.0',
 914         '72.0.3626.21',
 915         '71.0.3578.101',
 916         '73.0.3641.1',
 917         '73.0.3641.0',
 918         '72.0.3626.20',
 919         '71.0.3578.100',
 920         '72.0.3626.19',
 921         '73.0.3640.1',
 922         '73.0.3640.0',
 923         '72.0.3626.18',
 924         '73.0.3639.1',
 925         '71.0.3578.99',
 926         '73.0.3639.0',
 927         '72.0.3626.17',
 928         '73.0.3638.2',
 929         '72.0.3626.16',
 930         '73.0.3638.1',
 931         '73.0.3638.0',
 932         '72.0.3626.15',
 933         '71.0.3578.98',
 934         '73.0.3635.2',
 935         '71.0.3578.97',
 936         '73.0.3637.1',
 937         '73.0.3637.0',
 938         '72.0.3626.14',
 939         '71.0.3578.96',
 940         '71.0.3578.95',
 941         '72.0.3626.13',
 942         '71.0.3578.94',
 943         '73.0.3636.2',
 944         '71.0.3578.93',
 945         '73.0.3636.1',
 946         '73.0.3636.0',
 947         '72.0.3626.12',
 948         '71.0.3578.92',
 949         '73.0.3635.1',
 950         '73.0.3635.0',
 951         '72.0.3626.11',
 952         '71.0.3578.91',
 953         '73.0.3634.2',
 954         '73.0.3634.1',
 955         '73.0.3634.0',
 956         '72.0.3626.10',
 957         '71.0.3578.90',
 958         '71.0.3578.89',
 959         '73.0.3633.2',
 960         '73.0.3633.1',
 961         '73.0.3633.0',
 962         '72.0.3610.4',
 963         '72.0.3626.9',
 964         '71.0.3578.88',
 965         '73.0.3632.5',
 966         '73.0.3632.4',
 967         '73.0.3632.3',
 968         '73.0.3632.2',
 969         '73.0.3632.1',
 970         '73.0.3632.0',
 971         '72.0.3626.8',
 972         '71.0.3578.87',
 973         '73.0.3631.2',
 974         '73.0.3631.1',
 975         '73.0.3631.0',
 976         '72.0.3626.7',
 977         '71.0.3578.86',
 978         '72.0.3626.6',
 979         '73.0.3630.1',
 980         '73.0.3630.0',
 981         '72.0.3626.5',
 982         '71.0.3578.85',
 983         '72.0.3626.4',
 984         '73.0.3628.3',
 985         '73.0.3628.2',
 986         '73.0.3629.1',
 987         '73.0.3629.0',
 988         '72.0.3626.3',
 989         '71.0.3578.84',
 990         '73.0.3628.1',
 991         '73.0.3628.0',
 992         '71.0.3578.83',
 993         '73.0.3627.1',
 994         '73.0.3627.0',
 995         '72.0.3626.2',
 996         '71.0.3578.82',
 997         '71.0.3578.81',
 998         '71.0.3578.80',
 999         '72.0.3626.1',
1000         '72.0.3626.0',
1001         '71.0.3578.79',
1002         '70.0.3538.124',
1003         '71.0.3578.78',
1004         '72.0.3623.4',
1005         '72.0.3625.2',
1006         '72.0.3625.1',
1007         '72.0.3625.0',
1008         '71.0.3578.77',
1009         '70.0.3538.123',
1010         '72.0.3624.4',
1011         '72.0.3624.3',
1012         '72.0.3624.2',
1013         '71.0.3578.76',
1014         '72.0.3624.1',
1015         '72.0.3624.0',
1016         '72.0.3623.3',
1017         '71.0.3578.75',
1018         '70.0.3538.122',
1019         '71.0.3578.74',
1020         '72.0.3623.2',
1021         '72.0.3610.3',
1022         '72.0.3623.1',
1023         '72.0.3623.0',
1024         '72.0.3622.3',
1025         '72.0.3622.2',
1026         '71.0.3578.73',
1027         '70.0.3538.121',
1028         '72.0.3622.1',
1029         '72.0.3622.0',
1030         '71.0.3578.72',
1031         '70.0.3538.120',
1032         '72.0.3621.1',
1033         '72.0.3621.0',
1034         '71.0.3578.71',
1035         '70.0.3538.119',
1036         '72.0.3620.1',
1037         '72.0.3620.0',
1038         '71.0.3578.70',
1039         '70.0.3538.118',
1040         '71.0.3578.69',
1041         '72.0.3619.1',
1042         '72.0.3619.0',
1043         '71.0.3578.68',
1044         '70.0.3538.117',
1045         '71.0.3578.67',
1046         '72.0.3618.1',
1047         '72.0.3618.0',
1048         '71.0.3578.66',
1049         '70.0.3538.116',
1050         '72.0.3617.1',
1051         '72.0.3617.0',
1052         '71.0.3578.65',
1053         '70.0.3538.115',
1054         '72.0.3602.3',
1055         '71.0.3578.64',
1056         '72.0.3616.1',
1057         '72.0.3616.0',
1058         '71.0.3578.63',
1059         '70.0.3538.114',
1060         '71.0.3578.62',
1061         '72.0.3615.1',
1062         '72.0.3615.0',
1063         '71.0.3578.61',
1064         '70.0.3538.113',
1065         '72.0.3614.1',
1066         '72.0.3614.0',
1067         '71.0.3578.60',
1068         '70.0.3538.112',
1069         '72.0.3613.1',
1070         '72.0.3613.0',
1071         '71.0.3578.59',
1072         '70.0.3538.111',
1073         '72.0.3612.2',
1074         '72.0.3612.1',
1075         '72.0.3612.0',
1076         '70.0.3538.110',
1077         '71.0.3578.58',
1078         '70.0.3538.109',
1079         '72.0.3611.2',
1080         '72.0.3611.1',
1081         '72.0.3611.0',
1082         '71.0.3578.57',
1083         '70.0.3538.108',
1084         '72.0.3610.2',
1085         '71.0.3578.56',
1086         '71.0.3578.55',
1087         '72.0.3610.1',
1088         '72.0.3610.0',
1089         '71.0.3578.54',
1090         '70.0.3538.107',
1091         '71.0.3578.53',
1092         '72.0.3609.3',
1093         '71.0.3578.52',
1094         '72.0.3609.2',
1095         '71.0.3578.51',
1096         '72.0.3608.5',
1097         '72.0.3609.1',
1098         '72.0.3609.0',
1099         '71.0.3578.50',
1100         '70.0.3538.106',
1101         '72.0.3608.4',
1102         '72.0.3608.3',
1103         '72.0.3608.2',
1104         '71.0.3578.49',
1105         '72.0.3608.1',
1106         '72.0.3608.0',
1107         '70.0.3538.105',
1108         '71.0.3578.48',
1109         '72.0.3607.1',
1110         '72.0.3607.0',
1111         '71.0.3578.47',
1112         '70.0.3538.104',
1113         '72.0.3606.2',
1114         '72.0.3606.1',
1115         '72.0.3606.0',
1116         '71.0.3578.46',
1117         '70.0.3538.103',
1118         '70.0.3538.102',
1119         '72.0.3605.3',
1120         '72.0.3605.2',
1121         '72.0.3605.1',
1122         '72.0.3605.0',
1123         '71.0.3578.45',
1124         '70.0.3538.101',
1125         '71.0.3578.44',
1126         '71.0.3578.43',
1127         '70.0.3538.100',
1128         '70.0.3538.99',
1129         '71.0.3578.42',
1130         '72.0.3604.1',
1131         '72.0.3604.0',
1132         '71.0.3578.41',
1133         '70.0.3538.98',
1134         '71.0.3578.40',
1135         '72.0.3603.2',
1136         '72.0.3603.1',
1137         '72.0.3603.0',
1138         '71.0.3578.39',
1139         '70.0.3538.97',
1140         '72.0.3602.2',
1141         '71.0.3578.38',
1142         '71.0.3578.37',
1143         '72.0.3602.1',
1144         '72.0.3602.0',
1145         '71.0.3578.36',
1146         '70.0.3538.96',
1147         '72.0.3601.1',
1148         '72.0.3601.0',
1149         '71.0.3578.35',
1150         '70.0.3538.95',
1151         '72.0.3600.1',
1152         '72.0.3600.0',
1153         '71.0.3578.34',
1154         '70.0.3538.94',
1155         '72.0.3599.3',
1156         '72.0.3599.2',
1157         '72.0.3599.1',
1158         '72.0.3599.0',
1159         '71.0.3578.33',
1160         '70.0.3538.93',
1161         '72.0.3598.1',
1162         '72.0.3598.0',
1163         '71.0.3578.32',
1164         '70.0.3538.87',
1165         '72.0.3597.1',
1166         '72.0.3597.0',
1167         '72.0.3596.2',
1168         '71.0.3578.31',
1169         '70.0.3538.86',
1170         '71.0.3578.30',
1171         '71.0.3578.29',
1172         '72.0.3596.1',
1173         '72.0.3596.0',
1174         '71.0.3578.28',
1175         '70.0.3538.85',
1176         '72.0.3595.2',
1177         '72.0.3591.3',
1178         '72.0.3595.1',
1179         '72.0.3595.0',
1180         '71.0.3578.27',
1181         '70.0.3538.84',
1182         '72.0.3594.1',
1183         '72.0.3594.0',
1184         '71.0.3578.26',
1185         '70.0.3538.83',
1186         '72.0.3593.2',
1187         '72.0.3593.1',
1188         '72.0.3593.0',
1189         '71.0.3578.25',
1190         '70.0.3538.82',
1191         '72.0.3589.3',
1192         '72.0.3592.2',
1193         '72.0.3592.1',
1194         '72.0.3592.0',
1195         '71.0.3578.24',
1196         '72.0.3589.2',
1197         '70.0.3538.81',
1198         '70.0.3538.80',
1199         '72.0.3591.2',
1200         '72.0.3591.1',
1201         '72.0.3591.0',
1202         '71.0.3578.23',
1203         '70.0.3538.79',
1204         '71.0.3578.22',
1205         '72.0.3590.1',
1206         '72.0.3590.0',
1207         '71.0.3578.21',
1208         '70.0.3538.78',
1209         '70.0.3538.77',
1210         '72.0.3589.1',
1211         '72.0.3589.0',
1212         '71.0.3578.20',
1213         '70.0.3538.76',
1214         '71.0.3578.19',
1215         '70.0.3538.75',
1216         '72.0.3588.1',
1217         '72.0.3588.0',
1218         '71.0.3578.18',
1219         '70.0.3538.74',
1220         '72.0.3586.2',
1221         '72.0.3587.0',
1222         '71.0.3578.17',
1223         '70.0.3538.73',
1224         '72.0.3586.1',
1225         '72.0.3586.0',
1226         '71.0.3578.16',
1227         '70.0.3538.72',
1228         '72.0.3585.1',
1229         '72.0.3585.0',
1230         '71.0.3578.15',
1231         '70.0.3538.71',
1232         '71.0.3578.14',
1233         '72.0.3584.1',
1234         '72.0.3584.0',
1235         '71.0.3578.13',
1236         '70.0.3538.70',
1237         '72.0.3583.2',
1238         '71.0.3578.12',
1239         '72.0.3583.1',
1240         '72.0.3583.0',
1241         '71.0.3578.11',
1242         '70.0.3538.69',
1243         '71.0.3578.10',
1244         '72.0.3582.0',
1245         '72.0.3581.4',
1246         '71.0.3578.9',
1247         '70.0.3538.67',
1248         '72.0.3581.3',
1249         '72.0.3581.2',
1250         '72.0.3581.1',
1251         '72.0.3581.0',
1252         '71.0.3578.8',
1253         '70.0.3538.66',
1254         '72.0.3580.1',
1255         '72.0.3580.0',
1256         '71.0.3578.7',
1257         '70.0.3538.65',
1258         '71.0.3578.6',
1259         '72.0.3579.1',
1260         '72.0.3579.0',
1261         '71.0.3578.5',
1262         '70.0.3538.64',
1263         '71.0.3578.4',
1264         '71.0.3578.3',
1265         '71.0.3578.2',
1266         '71.0.3578.1',
1267         '71.0.3578.0',
1268         '70.0.3538.63',
1269         '69.0.3497.128',
1270         '70.0.3538.62',
1271         '70.0.3538.61',
1272         '70.0.3538.60',
1273         '70.0.3538.59',
1274         '71.0.3577.1',
1275         '71.0.3577.0',
1276         '70.0.3538.58',
1277         '69.0.3497.127',
1278         '71.0.3576.2',
1279         '71.0.3576.1',
1280         '71.0.3576.0',
1281         '70.0.3538.57',
1282         '70.0.3538.56',
1283         '71.0.3575.2',
1284         '70.0.3538.55',
1285         '69.0.3497.126',
1286         '70.0.3538.54',
1287         '71.0.3575.1',
1288         '71.0.3575.0',
1289         '71.0.3574.1',
1290         '71.0.3574.0',
1291         '70.0.3538.53',
1292         '69.0.3497.125',
1293         '70.0.3538.52',
1294         '71.0.3573.1',
1295         '71.0.3573.0',
1296         '70.0.3538.51',
1297         '69.0.3497.124',
1298         '71.0.3572.1',
1299         '71.0.3572.0',
1300         '70.0.3538.50',
1301         '69.0.3497.123',
1302         '71.0.3571.2',
1303         '70.0.3538.49',
1304         '69.0.3497.122',
1305         '71.0.3571.1',
1306         '71.0.3571.0',
1307         '70.0.3538.48',
1308         '69.0.3497.121',
1309         '71.0.3570.1',
1310         '71.0.3570.0',
1311         '70.0.3538.47',
1312         '69.0.3497.120',
1313         '71.0.3568.2',
1314         '71.0.3569.1',
1315         '71.0.3569.0',
1316         '70.0.3538.46',
1317         '69.0.3497.119',
1318         '70.0.3538.45',
1319         '71.0.3568.1',
1320         '71.0.3568.0',
1321         '70.0.3538.44',
1322         '69.0.3497.118',
1323         '70.0.3538.43',
1324         '70.0.3538.42',
1325         '71.0.3567.1',
1326         '71.0.3567.0',
1327         '70.0.3538.41',
1328         '69.0.3497.117',
1329         '71.0.3566.1',
1330         '71.0.3566.0',
1331         '70.0.3538.40',
1332         '69.0.3497.116',
1333         '71.0.3565.1',
1334         '71.0.3565.0',
1335         '70.0.3538.39',
1336         '69.0.3497.115',
1337         '71.0.3564.1',
1338         '71.0.3564.0',
1339         '70.0.3538.38',
1340         '69.0.3497.114',
1341         '71.0.3563.0',
1342         '71.0.3562.2',
1343         '70.0.3538.37',
1344         '69.0.3497.113',
1345         '70.0.3538.36',
1346         '70.0.3538.35',
1347         '71.0.3562.1',
1348         '71.0.3562.0',
1349         '70.0.3538.34',
1350         '69.0.3497.112',
1351         '70.0.3538.33',
1352         '71.0.3561.1',
1353         '71.0.3561.0',
1354         '70.0.3538.32',
1355         '69.0.3497.111',
1356         '71.0.3559.6',
1357         '71.0.3560.1',
1358         '71.0.3560.0',
1359         '71.0.3559.5',
1360         '71.0.3559.4',
1361         '70.0.3538.31',
1362         '69.0.3497.110',
1363         '71.0.3559.3',
1364         '70.0.3538.30',
1365         '69.0.3497.109',
1366         '71.0.3559.2',
1367         '71.0.3559.1',
1368         '71.0.3559.0',
1369         '70.0.3538.29',
1370         '69.0.3497.108',
1371         '71.0.3558.2',
1372         '71.0.3558.1',
1373         '71.0.3558.0',
1374         '70.0.3538.28',
1375         '69.0.3497.107',
1376         '71.0.3557.2',
1377         '71.0.3557.1',
1378         '71.0.3557.0',
1379         '70.0.3538.27',
1380         '69.0.3497.106',
1381         '71.0.3554.4',
1382         '70.0.3538.26',
1383         '71.0.3556.1',
1384         '71.0.3556.0',
1385         '70.0.3538.25',
1386         '71.0.3554.3',
1387         '69.0.3497.105',
1388         '71.0.3554.2',
1389         '70.0.3538.24',
1390         '69.0.3497.104',
1391         '71.0.3555.2',
1392         '70.0.3538.23',
1393         '71.0.3555.1',
1394         '71.0.3555.0',
1395         '70.0.3538.22',
1396         '69.0.3497.103',
1397         '71.0.3554.1',
1398         '71.0.3554.0',
1399         '70.0.3538.21',
1400         '69.0.3497.102',
1401         '71.0.3553.3',
1402         '70.0.3538.20',
1403         '69.0.3497.101',
1404         '71.0.3553.2',
1405         '69.0.3497.100',
1406         '71.0.3553.1',
1407         '71.0.3553.0',
1408         '70.0.3538.19',
1409         '69.0.3497.99',
1410         '69.0.3497.98',
1411         '69.0.3497.97',
1412         '71.0.3552.6',
1413         '71.0.3552.5',
1414         '71.0.3552.4',
1415         '71.0.3552.3',
1416         '71.0.3552.2',
1417         '71.0.3552.1',
1418         '71.0.3552.0',
1419         '70.0.3538.18',
1420         '69.0.3497.96',
1421         '71.0.3551.3',
1422         '71.0.3551.2',
1423         '71.0.3551.1',
1424         '71.0.3551.0',
1425         '70.0.3538.17',
1426         '69.0.3497.95',
1427         '71.0.3550.3',
1428         '71.0.3550.2',
1429         '71.0.3550.1',
1430         '71.0.3550.0',
1431         '70.0.3538.16',
1432         '69.0.3497.94',
1433         '71.0.3549.1',
1434         '71.0.3549.0',
1435         '70.0.3538.15',
1436         '69.0.3497.93',
1437         '69.0.3497.92',
1438         '71.0.3548.1',
1439         '71.0.3548.0',
1440         '70.0.3538.14',
1441         '69.0.3497.91',
1442         '71.0.3547.1',
1443         '71.0.3547.0',
1444         '70.0.3538.13',
1445         '69.0.3497.90',
1446         '71.0.3546.2',
1447         '69.0.3497.89',
1448         '71.0.3546.1',
1449         '71.0.3546.0',
1450         '70.0.3538.12',
1451         '69.0.3497.88',
1452         '71.0.3545.4',
1453         '71.0.3545.3',
1454         '71.0.3545.2',
1455         '71.0.3545.1',
1456         '71.0.3545.0',
1457         '70.0.3538.11',
1458         '69.0.3497.87',
1459         '71.0.3544.5',
1460         '71.0.3544.4',
1461         '71.0.3544.3',
1462         '71.0.3544.2',
1463         '71.0.3544.1',
1464         '71.0.3544.0',
1465         '69.0.3497.86',
1466         '70.0.3538.10',
1467         '69.0.3497.85',
1468         '70.0.3538.9',
1469         '69.0.3497.84',
1470         '71.0.3543.4',
1471         '70.0.3538.8',
1472         '71.0.3543.3',
1473         '71.0.3543.2',
1474         '71.0.3543.1',
1475         '71.0.3543.0',
1476         '70.0.3538.7',
1477         '69.0.3497.83',
1478         '71.0.3542.2',
1479         '71.0.3542.1',
1480         '71.0.3542.0',
1481         '70.0.3538.6',
1482         '69.0.3497.82',
1483         '69.0.3497.81',
1484         '71.0.3541.1',
1485         '71.0.3541.0',
1486         '70.0.3538.5',
1487         '69.0.3497.80',
1488         '71.0.3540.1',
1489         '71.0.3540.0',
1490         '70.0.3538.4',
1491         '69.0.3497.79',
1492         '70.0.3538.3',
1493         '71.0.3539.1',
1494         '71.0.3539.0',
1495         '69.0.3497.78',
1496         '68.0.3440.134',
1497         '69.0.3497.77',
1498         '70.0.3538.2',
1499         '70.0.3538.1',
1500         '70.0.3538.0',
1501         '69.0.3497.76',
1502         '68.0.3440.133',
1503         '69.0.3497.75',
1504         '70.0.3537.2',
1505         '70.0.3537.1',
1506         '70.0.3537.0',
1507         '69.0.3497.74',
1508         '68.0.3440.132',
1509         '70.0.3536.0',
1510         '70.0.3535.5',
1511         '70.0.3535.4',
1512         '70.0.3535.3',
1513         '69.0.3497.73',
1514         '68.0.3440.131',
1515         '70.0.3532.8',
1516         '70.0.3532.7',
1517         '69.0.3497.72',
1518         '69.0.3497.71',
1519         '70.0.3535.2',
1520         '70.0.3535.1',
1521         '70.0.3535.0',
1522         '69.0.3497.70',
1523         '68.0.3440.130',
1524         '69.0.3497.69',
1525         '68.0.3440.129',
1526         '70.0.3534.4',
1527         '70.0.3534.3',
1528         '70.0.3534.2',
1529         '70.0.3534.1',
1530         '70.0.3534.0',
1531         '69.0.3497.68',
1532         '68.0.3440.128',
1533         '70.0.3533.2',
1534         '70.0.3533.1',
1535         '70.0.3533.0',
1536         '69.0.3497.67',
1537         '68.0.3440.127',
1538         '70.0.3532.6',
1539         '70.0.3532.5',
1540         '70.0.3532.4',
1541         '69.0.3497.66',
1542         '68.0.3440.126',
1543         '70.0.3532.3',
1544         '70.0.3532.2',
1545         '70.0.3532.1',
1546         '69.0.3497.60',
1547         '69.0.3497.65',
1548         '69.0.3497.64',
1549         '70.0.3532.0',
1550         '70.0.3531.0',
1551         '70.0.3530.4',
1552         '70.0.3530.3',
1553         '70.0.3530.2',
1554         '69.0.3497.58',
1555         '68.0.3440.125',
1556         '69.0.3497.57',
1557         '69.0.3497.56',
1558         '69.0.3497.55',
1559         '69.0.3497.54',
1560         '70.0.3530.1',
1561         '70.0.3530.0',
1562         '69.0.3497.53',
1563         '68.0.3440.124',
1564         '69.0.3497.52',
1565         '70.0.3529.3',
1566         '70.0.3529.2',
1567         '70.0.3529.1',
1568         '70.0.3529.0',
1569         '69.0.3497.51',
1570         '70.0.3528.4',
1571         '68.0.3440.123',
1572         '70.0.3528.3',
1573         '70.0.3528.2',
1574         '70.0.3528.1',
1575         '70.0.3528.0',
1576         '69.0.3497.50',
1577         '68.0.3440.122',
1578         '70.0.3527.1',
1579         '70.0.3527.0',
1580         '69.0.3497.49',
1581         '68.0.3440.121',
1582         '70.0.3526.1',
1583         '70.0.3526.0',
1584         '68.0.3440.120',
1585         '69.0.3497.48',
1586         '69.0.3497.47',
1587         '68.0.3440.119',
1588         '68.0.3440.118',
1589         '70.0.3525.5',
1590         '70.0.3525.4',
1591         '70.0.3525.3',
1592         '68.0.3440.117',
1593         '69.0.3497.46',
1594         '70.0.3525.2',
1595         '70.0.3525.1',
1596         '70.0.3525.0',
1597         '69.0.3497.45',
1598         '68.0.3440.116',
1599         '70.0.3524.4',
1600         '70.0.3524.3',
1601         '69.0.3497.44',
1602         '70.0.3524.2',
1603         '70.0.3524.1',
1604         '70.0.3524.0',
1605         '70.0.3523.2',
1606         '69.0.3497.43',
1607         '68.0.3440.115',
1608         '70.0.3505.9',
1609         '69.0.3497.42',
1610         '70.0.3505.8',
1611         '70.0.3523.1',
1612         '70.0.3523.0',
1613         '69.0.3497.41',
1614         '68.0.3440.114',
1615         '70.0.3505.7',
1616         '69.0.3497.40',
1617         '70.0.3522.1',
1618         '70.0.3522.0',
1619         '70.0.3521.2',
1620         '69.0.3497.39',
1621         '68.0.3440.113',
1622         '70.0.3505.6',
1623         '70.0.3521.1',
1624         '70.0.3521.0',
1625         '69.0.3497.38',
1626         '68.0.3440.112',
1627         '70.0.3520.1',
1628         '70.0.3520.0',
1629         '69.0.3497.37',
1630         '68.0.3440.111',
1631         '70.0.3519.3',
1632         '70.0.3519.2',
1633         '70.0.3519.1',
1634         '70.0.3519.0',
1635         '69.0.3497.36',
1636         '68.0.3440.110',
1637         '70.0.3518.1',
1638         '70.0.3518.0',
1639         '69.0.3497.35',
1640         '69.0.3497.34',
1641         '68.0.3440.109',
1642         '70.0.3517.1',
1643         '70.0.3517.0',
1644         '69.0.3497.33',
1645         '68.0.3440.108',
1646         '69.0.3497.32',
1647         '70.0.3516.3',
1648         '70.0.3516.2',
1649         '70.0.3516.1',
1650         '70.0.3516.0',
1651         '69.0.3497.31',
1652         '68.0.3440.107',
1653         '70.0.3515.4',
1654         '68.0.3440.106',
1655         '70.0.3515.3',
1656         '70.0.3515.2',
1657         '70.0.3515.1',
1658         '70.0.3515.0',
1659         '69.0.3497.30',
1660         '68.0.3440.105',
1661         '68.0.3440.104',
1662         '70.0.3514.2',
1663         '70.0.3514.1',
1664         '70.0.3514.0',
1665         '69.0.3497.29',
1666         '68.0.3440.103',
1667         '70.0.3513.1',
1668         '70.0.3513.0',
1669         '69.0.3497.28',
1670     )
1671     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675     'User-Agent': random_user_agent(),
1676     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678     'Accept-Encoding': 'gzip, deflate',
1679     'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691     'January', 'February', 'March', 'April', 'May', 'June',
1692     'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695     'en': ENGLISH_MONTH_NAMES,
1696     'fr': [
1697         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703     'flv', 'f4v', 'f4a', 'f4b',
1704     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705     'mkv', 'mka', 'mk3d',
1706     'avi', 'divx',
1707     'mov',
1708     'asf', 'wmv', 'wma',
1709     '3gp', '3g2',
1710     'mp3',
1711     'flac',
1712     'ape',
1713     'wav',
1714     'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722     '%d %B %Y',
1723     '%d %b %Y',
1724     '%B %d %Y',
1725     '%B %dst %Y',
1726     '%B %dnd %Y',
1727     '%B %drd %Y',
1728     '%B %dth %Y',
1729     '%b %d %Y',
1730     '%b %dst %Y',
1731     '%b %dnd %Y',
1732     '%b %drd %Y',
1733     '%b %dth %Y',
1734     '%b %dst %Y %I:%M',
1735     '%b %dnd %Y %I:%M',
1736     '%b %drd %Y %I:%M',
1737     '%b %dth %Y %I:%M',
1738     '%Y %m %d',
1739     '%Y-%m-%d',
1740     '%Y/%m/%d',
1741     '%Y/%m/%d %H:%M',
1742     '%Y/%m/%d %H:%M:%S',
1743     '%Y-%m-%d %H:%M',
1744     '%Y-%m-%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M:%S.%f',
1746     '%d.%m.%Y %H:%M',
1747     '%d.%m.%Y %H.%M',
1748     '%Y-%m-%dT%H:%M:%SZ',
1749     '%Y-%m-%dT%H:%M:%S.%fZ',
1750     '%Y-%m-%dT%H:%M:%S.%f0Z',
1751     '%Y-%m-%dT%H:%M:%S',
1752     '%Y-%m-%dT%H:%M:%S.%f',
1753     '%Y-%m-%dT%H:%M',
1754     '%b %d %Y at %H:%M',
1755     '%b %d %Y at %H:%M:%S',
1756     '%B %d %Y at %H:%M',
1757     '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762     '%d-%m-%Y',
1763     '%d.%m.%Y',
1764     '%d.%m.%y',
1765     '%d/%m/%Y',
1766     '%d/%m/%y',
1767     '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772     '%m-%d-%Y',
1773     '%m.%d.%Y',
1774     '%m/%d/%Y',
1775     '%m/%d/%y',
1776     '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784     """Get preferred encoding.
1785
1786     Returns the best encoding scheme for the system, based on
1787     locale.getpreferredencoding() and some further tweaks.
1788     """
1789     try:
1790         pref = locale.getpreferredencoding()
1791         'TEST'.encode(pref)
1792     except Exception:
1793         pref = 'UTF-8'
1794
1795     return pref
1796
1797
1798 def write_json_file(obj, fn):
1799     """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801     fn = encodeFilename(fn)
1802     if sys.version_info < (3, 0) and sys.platform != 'win32':
1803         encoding = get_filesystem_encoding()
1804         # os.path.basename returns a bytes object, but NamedTemporaryFile
1805         # will fail if the filename contains non ascii characters unless we
1806         # use a unicode object
1807         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808         # the same for os.path.dirname
1809         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810     else:
1811         path_basename = os.path.basename
1812         path_dirname = os.path.dirname
1813
1814     args = {
1815         'suffix': '.tmp',
1816         'prefix': path_basename(fn) + '.',
1817         'dir': path_dirname(fn),
1818         'delete': False,
1819     }
1820
1821     # In Python 2.x, json.dump expects a bytestream.
1822     # In Python 3.x, it writes to a character stream
1823     if sys.version_info < (3, 0):
1824         args['mode'] = 'wb'
1825     else:
1826         args.update({
1827             'mode': 'w',
1828             'encoding': 'utf-8',
1829         })
1830
1831     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833     try:
1834         with tf:
1835             json.dump(obj, tf)
1836         if sys.platform == 'win32':
1837             # Need to remove existing file on Windows, else os.rename raises
1838             # WindowsError or FileExistsError.
1839             try:
1840                 os.unlink(fn)
1841             except OSError:
1842                 pass
1843         try:
1844             mask = os.umask(0)
1845             os.umask(mask)
1846             os.chmod(tf.name, 0o666 & ~mask)
1847         except OSError:
1848             pass
1849         os.rename(tf.name, fn)
1850     except Exception:
1851         try:
1852             os.remove(tf.name)
1853         except OSError:
1854             pass
1855         raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859     def find_xpath_attr(node, xpath, key, val=None):
1860         """ Find the xpath xpath[@key=val] """
1861         assert re.match(r'^[a-zA-Z_-]+$', key)
1862         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863         return node.find(expr)
1864 else:
1865     def find_xpath_attr(node, xpath, key, val=None):
1866         for f in node.findall(compat_xpath(xpath)):
1867             if key not in f.attrib:
1868                 continue
1869             if val is None or f.attrib.get(key) == val:
1870                 return f
1871         return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878     components = [c.split(':') for c in path.split('/')]
1879     replaced = []
1880     for c in components:
1881         if len(c) == 1:
1882             replaced.append(c[0])
1883         else:
1884             ns, tag = c
1885             replaced.append('{%s}%s' % (ns_map[ns], tag))
1886     return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890     def _find_xpath(xpath):
1891         return node.find(compat_xpath(xpath))
1892
1893     if isinstance(xpath, (str, compat_str)):
1894         n = _find_xpath(xpath)
1895     else:
1896         for xp in xpath:
1897             n = _find_xpath(xp)
1898             if n is not None:
1899                 break
1900
1901     if n is None:
1902         if default is not NO_DEFAULT:
1903             return default
1904         elif fatal:
1905             name = xpath if name is None else name
1906             raise ExtractorError('Could not find XML element %s' % name)
1907         else:
1908             return None
1909     return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914     if n is None or n == default:
1915         return n
1916     if n.text is None:
1917         if default is not NO_DEFAULT:
1918             return default
1919         elif fatal:
1920             name = xpath if name is None else name
1921             raise ExtractorError('Could not find XML element\'s text %s' % name)
1922         else:
1923             return None
1924     return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928     n = find_xpath_attr(node, xpath, key)
1929     if n is None:
1930         if default is not NO_DEFAULT:
1931             return default
1932         elif fatal:
1933             name = '%s[@%s]' % (xpath, key) if name is None else name
1934             raise ExtractorError('Could not find XML attribute %s' % name)
1935         else:
1936             return None
1937     return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941     """Return the content of the tag with the specified ID in the passed HTML document"""
1942     return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946     """Return the content of the first tag with the specified class in the passed HTML document"""
1947     retval = get_elements_by_class(class_name, html)
1948     return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953     return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958     return get_elements_by_attribute(
1959         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960         html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964     """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966     value = re.escape(value) if escape_value else value
1967
1968     retlist = []
1969     for m in re.finditer(r'''(?xs)
1970         <([a-zA-Z0-9:._-]+)
1971          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972          \s+%s=['"]?%s['"]?
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974         \s*>
1975         (?P<content>.*?)
1976         </\1>
1977     ''' % (re.escape(attribute), value), html):
1978         res = m.group('content')
1979
1980         if res.startswith('"') or res.startswith("'"):
1981             res = res[1:-1]
1982
1983         retlist.append(unescapeHTML(res))
1984
1985     return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989     """Trivial HTML parser to gather the attributes for a single element"""
1990
1991     def __init__(self):
1992         self.attrs = {}
1993         compat_HTMLParser.__init__(self)
1994
1995     def handle_starttag(self, tag, attrs):
1996         self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000     """Given a string for an HTML element such as
2001     <el
2002          a="foo" B="bar" c="&98;az" d=boz
2003          empty= noval entity="&amp;"
2004          sq='"' dq="'"
2005     >
2006     Decode and return a dictionary of attributes.
2007     {
2008         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009         'empty': '', 'noval': None, 'entity': '&',
2010         'sq': '"', 'dq': '\''
2011     }.
2012     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014     """
2015     parser = HTMLAttributeParser()
2016     try:
2017         parser.feed(html_element)
2018         parser.close()
2019     # Older Python may throw HTMLParseError in case of malformed HTML
2020     except compat_HTMLParseError:
2021         pass
2022     return parser.attrs
2023
2024
2025 def clean_html(html):
2026     """Clean an HTML snippet into a readable string"""
2027
2028     if html is None:  # Convenience for sanitizing descriptions etc.
2029         return html
2030
2031     # Newline vs <br />
2032     html = html.replace('\n', ' ')
2033     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035     # Strip html tags
2036     html = re.sub('<.*?>', '', html)
2037     # Replace html entities
2038     html = unescapeHTML(html)
2039     return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043     """Try to open the given filename, and slightly tweak it if this fails.
2044
2045     Attempts to open the given filename. If this fails, it tries to change
2046     the filename slightly, step by step, until it's either able to open it
2047     or it fails and raises a final exception, like the standard open()
2048     function.
2049
2050     It returns the tuple (stream, definitive_file_name).
2051     """
2052     try:
2053         if filename == '-':
2054             if sys.platform == 'win32':
2055                 import msvcrt
2056                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058         stream = open(encodeFilename(filename), open_mode)
2059         return (stream, filename)
2060     except (IOError, OSError) as err:
2061         if err.errno in (errno.EACCES,):
2062             raise
2063
2064         # In case of error, try to remove win32 forbidden chars
2065         alt_filename = sanitize_path(filename)
2066         if alt_filename == filename:
2067             raise
2068         else:
2069             # An exception here should be caught in the caller
2070             stream = open(encodeFilename(alt_filename), open_mode)
2071             return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075     """Convert RFC 2822 defined time string into system timestamp"""
2076     timestamp = None
2077     timetuple = email.utils.parsedate_tz(timestr)
2078     if timetuple is not None:
2079         timestamp = email.utils.mktime_tz(timetuple)
2080     return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084     """Sanitizes a string so it could be used as part of a filename.
2085     If restricted is set, use a stricter subset of allowed characters.
2086     Set is_id if this is not an arbitrary string, but an ID that should be kept
2087     if possible.
2088     """
2089     def replace_insane(char):
2090         if restricted and char in ACCENT_CHARS:
2091             return ACCENT_CHARS[char]
2092         if char == '?' or ord(char) < 32 or ord(char) == 127:
2093             return ''
2094         elif char == '"':
2095             return '' if restricted else '\''
2096         elif char == ':':
2097             return '_-' if restricted else ' -'
2098         elif char in '\\/|*<>':
2099             return '_'
2100         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101             return '_'
2102         if restricted and ord(char) > 127:
2103             return '_'
2104         return char
2105
2106     # Handle timestamps
2107     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108     result = ''.join(map(replace_insane, s))
2109     if not is_id:
2110         while '__' in result:
2111             result = result.replace('__', '_')
2112         result = result.strip('_')
2113         # Common case of "Foreign band name - English song title"
2114         if restricted and result.startswith('-_'):
2115             result = result[2:]
2116         if result.startswith('-'):
2117             result = '_' + result[len('-'):]
2118         result = result.lstrip('.')
2119         if not result:
2120             result = '_'
2121     return result
2122
2123
2124 def sanitize_path(s):
2125     """Sanitizes and normalizes path on Windows"""
2126     if sys.platform != 'win32':
2127         return s
2128     drive_or_unc, _ = os.path.splitdrive(s)
2129     if sys.version_info < (2, 7) and not drive_or_unc:
2130         drive_or_unc, _ = os.path.splitunc(s)
2131     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132     if drive_or_unc:
2133         norm_path.pop(0)
2134     sanitized_path = [
2135         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136         for path_part in norm_path]
2137     if drive_or_unc:
2138         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139     return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144     # the number of unwanted failures due to missing protocol
2145     if url.startswith('//'):
2146         return 'http:%s' % url
2147     # Fix some common typos seen so far
2148     COMMON_TYPOS = (
2149         # https://github.com/ytdl-org/youtube-dl/issues/15649
2150         (r'^httpss://', r'https://'),
2151         # https://bx1.be/lives/direct-tv/
2152         (r'^rmtp([es]?)://', r'rtmp\1://'),
2153     )
2154     for mistake, fixup in COMMON_TYPOS:
2155         if re.match(mistake, url):
2156             return re.sub(mistake, fixup, url)
2157     return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165     """Expand shell variables and ~"""
2166     return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170     """ Remove all duplicates from the input iterable """
2171     res = []
2172     for el in iterable:
2173         if el not in res:
2174             res.append(el)
2175     return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179     """Transforms an HTML entity to a character."""
2180     entity = entity_with_semicolon[:-1]
2181
2182     # Known non-numeric HTML entity
2183     if entity in compat_html_entities.name2codepoint:
2184         return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186     # TODO: HTML5 allows entities without a semicolon. For example,
2187     # '&Eacuteric' should be decoded as 'Éric'.
2188     if entity_with_semicolon in compat_html_entities_html5:
2189         return compat_html_entities_html5[entity_with_semicolon]
2190
2191     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192     if mobj is not None:
2193         numstr = mobj.group(1)
2194         if numstr.startswith('x'):
2195             base = 16
2196             numstr = '0%s' % numstr
2197         else:
2198             base = 10
2199         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200         try:
2201             return compat_chr(int(numstr, base))
2202         except ValueError:
2203             pass
2204
2205     # Unknown entity in name, return its literal representation
2206     return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210     if s is None:
2211         return None
2212     assert type(s) == compat_str
2213
2214     return re.sub(
2215         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def process_communicate_or_kill(p, *args, **kwargs):
2219     try:
2220         return p.communicate(*args, **kwargs)
2221     except BaseException:  # Including KeyboardInterrupt
2222         p.kill()
2223         p.wait()
2224         raise
2225
2226
2227 def get_subprocess_encoding():
2228     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229         # For subprocess calls, encode with locale encoding
2230         # Refer to http://stackoverflow.com/a/9951851/35070
2231         encoding = preferredencoding()
2232     else:
2233         encoding = sys.getfilesystemencoding()
2234     if encoding is None:
2235         encoding = 'utf-8'
2236     return encoding
2237
2238
2239 def encodeFilename(s, for_subprocess=False):
2240     """
2241     @param s The name of the file
2242     """
2243
2244     assert type(s) == compat_str
2245
2246     # Python 3 has a Unicode API
2247     if sys.version_info >= (3, 0):
2248         return s
2249
2250     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2251     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2252     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2253     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2254         return s
2255
2256     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2257     if sys.platform.startswith('java'):
2258         return s
2259
2260     return s.encode(get_subprocess_encoding(), 'ignore')
2261
2262
2263 def decodeFilename(b, for_subprocess=False):
2264
2265     if sys.version_info >= (3, 0):
2266         return b
2267
2268     if not isinstance(b, bytes):
2269         return b
2270
2271     return b.decode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def encodeArgument(s):
2275     if not isinstance(s, compat_str):
2276         # Legacy code that uses byte strings
2277         # Uncomment the following line after fixing all post processors
2278         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2279         s = s.decode('ascii')
2280     return encodeFilename(s, True)
2281
2282
2283 def decodeArgument(b):
2284     return decodeFilename(b, True)
2285
2286
2287 def decodeOption(optval):
2288     if optval is None:
2289         return optval
2290     if isinstance(optval, bytes):
2291         optval = optval.decode(preferredencoding())
2292
2293     assert isinstance(optval, compat_str)
2294     return optval
2295
2296
2297 def formatSeconds(secs, delim=':'):
2298     if secs > 3600:
2299         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2300     elif secs > 60:
2301         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2302     else:
2303         return '%d' % secs
2304
2305
2306 def make_HTTPS_handler(params, **kwargs):
2307     opts_no_check_certificate = params.get('nocheckcertificate', False)
2308     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2309         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2310         if opts_no_check_certificate:
2311             context.check_hostname = False
2312             context.verify_mode = ssl.CERT_NONE
2313         try:
2314             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2315         except TypeError:
2316             # Python 2.7.8
2317             # (create_default_context present but HTTPSHandler has no context=)
2318             pass
2319
2320     if sys.version_info < (3, 2):
2321         return YoutubeDLHTTPSHandler(params, **kwargs)
2322     else:  # Python < 3.4
2323         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2324         context.verify_mode = (ssl.CERT_NONE
2325                                if opts_no_check_certificate
2326                                else ssl.CERT_REQUIRED)
2327         context.set_default_verify_paths()
2328         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2329
2330
2331 def bug_reports_message():
2332     if ytdl_is_updateable():
2333         update_cmd = 'type  youtube-dlc -U  to update'
2334     else:
2335         update_cmd = 'see  https://github.com/pukkandan/yt-dlp  on how to update'
2336     msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
2337     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2338     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2339     return msg
2340
2341
2342 class YoutubeDLError(Exception):
2343     """Base exception for YoutubeDL errors."""
2344     pass
2345
2346
2347 class ExtractorError(YoutubeDLError):
2348     """Error during info extraction."""
2349
2350     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2351         """ tb, if given, is the original traceback (so that it can be printed out).
2352         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2353         """
2354
2355         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2356             expected = True
2357         if video_id is not None:
2358             msg = video_id + ': ' + msg
2359         if cause:
2360             msg += ' (caused by %r)' % cause
2361         if not expected:
2362             msg += bug_reports_message()
2363         super(ExtractorError, self).__init__(msg)
2364
2365         self.traceback = tb
2366         self.exc_info = sys.exc_info()  # preserve original exception
2367         self.cause = cause
2368         self.video_id = video_id
2369
2370     def format_traceback(self):
2371         if self.traceback is None:
2372             return None
2373         return ''.join(traceback.format_tb(self.traceback))
2374
2375
2376 class UnsupportedError(ExtractorError):
2377     def __init__(self, url):
2378         super(UnsupportedError, self).__init__(
2379             'Unsupported URL: %s' % url, expected=True)
2380         self.url = url
2381
2382
2383 class RegexNotFoundError(ExtractorError):
2384     """Error when a regex didn't match"""
2385     pass
2386
2387
2388 class GeoRestrictedError(ExtractorError):
2389     """Geographic restriction Error exception.
2390
2391     This exception may be thrown when a video is not available from your
2392     geographic location due to geographic restrictions imposed by a website.
2393     """
2394
2395     def __init__(self, msg, countries=None):
2396         super(GeoRestrictedError, self).__init__(msg, expected=True)
2397         self.msg = msg
2398         self.countries = countries
2399
2400
2401 class DownloadError(YoutubeDLError):
2402     """Download Error exception.
2403
2404     This exception may be thrown by FileDownloader objects if they are not
2405     configured to continue on errors. They will contain the appropriate
2406     error message.
2407     """
2408
2409     def __init__(self, msg, exc_info=None):
2410         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2411         super(DownloadError, self).__init__(msg)
2412         self.exc_info = exc_info
2413
2414
2415 class SameFileError(YoutubeDLError):
2416     """Same File exception.
2417
2418     This exception will be thrown by FileDownloader objects if they detect
2419     multiple files would have to be downloaded to the same file on disk.
2420     """
2421     pass
2422
2423
2424 class PostProcessingError(YoutubeDLError):
2425     """Post Processing exception.
2426
2427     This exception may be raised by PostProcessor's .run() method to
2428     indicate an error in the postprocessing task.
2429     """
2430
2431     def __init__(self, msg):
2432         super(PostProcessingError, self).__init__(msg)
2433         self.msg = msg
2434
2435
2436 class ExistingVideoReached(YoutubeDLError):
2437     """ --max-downloads limit has been reached. """
2438     pass
2439
2440
2441 class RejectedVideoReached(YoutubeDLError):
2442     """ --max-downloads limit has been reached. """
2443     pass
2444
2445
2446 class MaxDownloadsReached(YoutubeDLError):
2447     """ --max-downloads limit has been reached. """
2448     pass
2449
2450
2451 class UnavailableVideoError(YoutubeDLError):
2452     """Unavailable Format exception.
2453
2454     This exception will be thrown when a video is requested
2455     in a format that is not available for that video.
2456     """
2457     pass
2458
2459
2460 class ContentTooShortError(YoutubeDLError):
2461     """Content Too Short exception.
2462
2463     This exception may be raised by FileDownloader objects when a file they
2464     download is too small for what the server announced first, indicating
2465     the connection was probably interrupted.
2466     """
2467
2468     def __init__(self, downloaded, expected):
2469         super(ContentTooShortError, self).__init__(
2470             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2471         )
2472         # Both in bytes
2473         self.downloaded = downloaded
2474         self.expected = expected
2475
2476
2477 class XAttrMetadataError(YoutubeDLError):
2478     def __init__(self, code=None, msg='Unknown error'):
2479         super(XAttrMetadataError, self).__init__(msg)
2480         self.code = code
2481         self.msg = msg
2482
2483         # Parsing code and msg
2484         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2485                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2486             self.reason = 'NO_SPACE'
2487         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2488             self.reason = 'VALUE_TOO_LONG'
2489         else:
2490             self.reason = 'NOT_SUPPORTED'
2491
2492
2493 class XAttrUnavailableError(YoutubeDLError):
2494     pass
2495
2496
2497 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2498     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2499     # expected HTTP responses to meet HTTP/1.0 or later (see also
2500     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2501     if sys.version_info < (3, 0):
2502         kwargs['strict'] = True
2503     hc = http_class(*args, **compat_kwargs(kwargs))
2504     source_address = ydl_handler._params.get('source_address')
2505
2506     if source_address is not None:
2507         # This is to workaround _create_connection() from socket where it will try all
2508         # address data from getaddrinfo() including IPv6. This filters the result from
2509         # getaddrinfo() based on the source_address value.
2510         # This is based on the cpython socket.create_connection() function.
2511         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2512         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2513             host, port = address
2514             err = None
2515             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2516             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2517             ip_addrs = [addr for addr in addrs if addr[0] == af]
2518             if addrs and not ip_addrs:
2519                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2520                 raise socket.error(
2521                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2522                     % (ip_version, source_address[0]))
2523             for res in ip_addrs:
2524                 af, socktype, proto, canonname, sa = res
2525                 sock = None
2526                 try:
2527                     sock = socket.socket(af, socktype, proto)
2528                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2529                         sock.settimeout(timeout)
2530                     sock.bind(source_address)
2531                     sock.connect(sa)
2532                     err = None  # Explicitly break reference cycle
2533                     return sock
2534                 except socket.error as _:
2535                     err = _
2536                     if sock is not None:
2537                         sock.close()
2538             if err is not None:
2539                 raise err
2540             else:
2541                 raise socket.error('getaddrinfo returns an empty list')
2542         if hasattr(hc, '_create_connection'):
2543             hc._create_connection = _create_connection
2544         sa = (source_address, 0)
2545         if hasattr(hc, 'source_address'):  # Python 2.7+
2546             hc.source_address = sa
2547         else:  # Python 2.6
2548             def _hc_connect(self, *args, **kwargs):
2549                 sock = _create_connection(
2550                     (self.host, self.port), self.timeout, sa)
2551                 if is_https:
2552                     self.sock = ssl.wrap_socket(
2553                         sock, self.key_file, self.cert_file,
2554                         ssl_version=ssl.PROTOCOL_TLSv1)
2555                 else:
2556                     self.sock = sock
2557             hc.connect = functools.partial(_hc_connect, hc)
2558
2559     return hc
2560
2561
2562 def handle_youtubedl_headers(headers):
2563     filtered_headers = headers
2564
2565     if 'Youtubedl-no-compression' in filtered_headers:
2566         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2567         del filtered_headers['Youtubedl-no-compression']
2568
2569     return filtered_headers
2570
2571
2572 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2573     """Handler for HTTP requests and responses.
2574
2575     This class, when installed with an OpenerDirector, automatically adds
2576     the standard headers to every HTTP request and handles gzipped and
2577     deflated responses from web servers. If compression is to be avoided in
2578     a particular request, the original request in the program code only has
2579     to include the HTTP header "Youtubedl-no-compression", which will be
2580     removed before making the real request.
2581
2582     Part of this code was copied from:
2583
2584     http://techknack.net/python-urllib2-handlers/
2585
2586     Andrew Rowls, the author of that code, agreed to release it to the
2587     public domain.
2588     """
2589
2590     def __init__(self, params, *args, **kwargs):
2591         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2592         self._params = params
2593
2594     def http_open(self, req):
2595         conn_class = compat_http_client.HTTPConnection
2596
2597         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2598         if socks_proxy:
2599             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2600             del req.headers['Ytdl-socks-proxy']
2601
2602         return self.do_open(functools.partial(
2603             _create_http_connection, self, conn_class, False),
2604             req)
2605
2606     @staticmethod
2607     def deflate(data):
2608         try:
2609             return zlib.decompress(data, -zlib.MAX_WBITS)
2610         except zlib.error:
2611             return zlib.decompress(data)
2612
2613     def http_request(self, req):
2614         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2615         # always respected by websites, some tend to give out URLs with non percent-encoded
2616         # non-ASCII characters (see telemb.py, ard.py [#3412])
2617         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2618         # To work around aforementioned issue we will replace request's original URL with
2619         # percent-encoded one
2620         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2621         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2622         url = req.get_full_url()
2623         url_escaped = escape_url(url)
2624
2625         # Substitute URL if any change after escaping
2626         if url != url_escaped:
2627             req = update_Request(req, url=url_escaped)
2628
2629         for h, v in std_headers.items():
2630             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2631             # The dict keys are capitalized because of this bug by urllib
2632             if h.capitalize() not in req.headers:
2633                 req.add_header(h, v)
2634
2635         req.headers = handle_youtubedl_headers(req.headers)
2636
2637         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2638             # Python 2.6 is brain-dead when it comes to fragments
2639             req._Request__original = req._Request__original.partition('#')[0]
2640             req._Request__r_type = req._Request__r_type.partition('#')[0]
2641
2642         return req
2643
2644     def http_response(self, req, resp):
2645         old_resp = resp
2646         # gzip
2647         if resp.headers.get('Content-encoding', '') == 'gzip':
2648             content = resp.read()
2649             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2650             try:
2651                 uncompressed = io.BytesIO(gz.read())
2652             except IOError as original_ioerror:
2653                 # There may be junk add the end of the file
2654                 # See http://stackoverflow.com/q/4928560/35070 for details
2655                 for i in range(1, 1024):
2656                     try:
2657                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2658                         uncompressed = io.BytesIO(gz.read())
2659                     except IOError:
2660                         continue
2661                     break
2662                 else:
2663                     raise original_ioerror
2664             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2665             resp.msg = old_resp.msg
2666             del resp.headers['Content-encoding']
2667         # deflate
2668         if resp.headers.get('Content-encoding', '') == 'deflate':
2669             gz = io.BytesIO(self.deflate(resp.read()))
2670             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2671             resp.msg = old_resp.msg
2672             del resp.headers['Content-encoding']
2673         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2674         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2675         if 300 <= resp.code < 400:
2676             location = resp.headers.get('Location')
2677             if location:
2678                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2679                 if sys.version_info >= (3, 0):
2680                     location = location.encode('iso-8859-1').decode('utf-8')
2681                 else:
2682                     location = location.decode('utf-8')
2683                 location_escaped = escape_url(location)
2684                 if location != location_escaped:
2685                     del resp.headers['Location']
2686                     if sys.version_info < (3, 0):
2687                         location_escaped = location_escaped.encode('utf-8')
2688                     resp.headers['Location'] = location_escaped
2689         return resp
2690
2691     https_request = http_request
2692     https_response = http_response
2693
2694
2695 def make_socks_conn_class(base_class, socks_proxy):
2696     assert issubclass(base_class, (
2697         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2698
2699     url_components = compat_urlparse.urlparse(socks_proxy)
2700     if url_components.scheme.lower() == 'socks5':
2701         socks_type = ProxyType.SOCKS5
2702     elif url_components.scheme.lower() in ('socks', 'socks4'):
2703         socks_type = ProxyType.SOCKS4
2704     elif url_components.scheme.lower() == 'socks4a':
2705         socks_type = ProxyType.SOCKS4A
2706
2707     def unquote_if_non_empty(s):
2708         if not s:
2709             return s
2710         return compat_urllib_parse_unquote_plus(s)
2711
2712     proxy_args = (
2713         socks_type,
2714         url_components.hostname, url_components.port or 1080,
2715         True,  # Remote DNS
2716         unquote_if_non_empty(url_components.username),
2717         unquote_if_non_empty(url_components.password),
2718     )
2719
2720     class SocksConnection(base_class):
2721         def connect(self):
2722             self.sock = sockssocket()
2723             self.sock.setproxy(*proxy_args)
2724             if type(self.timeout) in (int, float):
2725                 self.sock.settimeout(self.timeout)
2726             self.sock.connect((self.host, self.port))
2727
2728             if isinstance(self, compat_http_client.HTTPSConnection):
2729                 if hasattr(self, '_context'):  # Python > 2.6
2730                     self.sock = self._context.wrap_socket(
2731                         self.sock, server_hostname=self.host)
2732                 else:
2733                     self.sock = ssl.wrap_socket(self.sock)
2734
2735     return SocksConnection
2736
2737
2738 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2739     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2740         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2741         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2742         self._params = params
2743
2744     def https_open(self, req):
2745         kwargs = {}
2746         conn_class = self._https_conn_class
2747
2748         if hasattr(self, '_context'):  # python > 2.6
2749             kwargs['context'] = self._context
2750         if hasattr(self, '_check_hostname'):  # python 3.x
2751             kwargs['check_hostname'] = self._check_hostname
2752
2753         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2754         if socks_proxy:
2755             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2756             del req.headers['Ytdl-socks-proxy']
2757
2758         return self.do_open(functools.partial(
2759             _create_http_connection, self, conn_class, True),
2760             req, **kwargs)
2761
2762
2763 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2764     """
2765     See [1] for cookie file format.
2766
2767     1. https://curl.haxx.se/docs/http-cookies.html
2768     """
2769     _HTTPONLY_PREFIX = '#HttpOnly_'
2770     _ENTRY_LEN = 7
2771     _HEADER = '''# Netscape HTTP Cookie File
2772 # This file is generated by youtube-dlc.  Do not edit.
2773
2774 '''
2775     _CookieFileEntry = collections.namedtuple(
2776         'CookieFileEntry',
2777         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2778
2779     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2780         """
2781         Save cookies to a file.
2782
2783         Most of the code is taken from CPython 3.8 and slightly adapted
2784         to support cookie files with UTF-8 in both python 2 and 3.
2785         """
2786         if filename is None:
2787             if self.filename is not None:
2788                 filename = self.filename
2789             else:
2790                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2791
2792         # Store session cookies with `expires` set to 0 instead of an empty
2793         # string
2794         for cookie in self:
2795             if cookie.expires is None:
2796                 cookie.expires = 0
2797
2798         with io.open(filename, 'w', encoding='utf-8') as f:
2799             f.write(self._HEADER)
2800             now = time.time()
2801             for cookie in self:
2802                 if not ignore_discard and cookie.discard:
2803                     continue
2804                 if not ignore_expires and cookie.is_expired(now):
2805                     continue
2806                 if cookie.secure:
2807                     secure = 'TRUE'
2808                 else:
2809                     secure = 'FALSE'
2810                 if cookie.domain.startswith('.'):
2811                     initial_dot = 'TRUE'
2812                 else:
2813                     initial_dot = 'FALSE'
2814                 if cookie.expires is not None:
2815                     expires = compat_str(cookie.expires)
2816                 else:
2817                     expires = ''
2818                 if cookie.value is None:
2819                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2820                     # with no name, whereas http.cookiejar regards it as a
2821                     # cookie with no value.
2822                     name = ''
2823                     value = cookie.name
2824                 else:
2825                     name = cookie.name
2826                     value = cookie.value
2827                 f.write(
2828                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2829                                secure, expires, name, value]) + '\n')
2830
2831     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2832         """Load cookies from a file."""
2833         if filename is None:
2834             if self.filename is not None:
2835                 filename = self.filename
2836             else:
2837                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2838
2839         def prepare_line(line):
2840             if line.startswith(self._HTTPONLY_PREFIX):
2841                 line = line[len(self._HTTPONLY_PREFIX):]
2842             # comments and empty lines are fine
2843             if line.startswith('#') or not line.strip():
2844                 return line
2845             cookie_list = line.split('\t')
2846             if len(cookie_list) != self._ENTRY_LEN:
2847                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2848             cookie = self._CookieFileEntry(*cookie_list)
2849             if cookie.expires_at and not cookie.expires_at.isdigit():
2850                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2851             return line
2852
2853         cf = io.StringIO()
2854         with io.open(filename, encoding='utf-8') as f:
2855             for line in f:
2856                 try:
2857                     cf.write(prepare_line(line))
2858                 except compat_cookiejar.LoadError as e:
2859                     write_string(
2860                         'WARNING: skipping cookie file entry due to %s: %r\n'
2861                         % (e, line), sys.stderr)
2862                     continue
2863         cf.seek(0)
2864         self._really_load(cf, filename, ignore_discard, ignore_expires)
2865         # Session cookies are denoted by either `expires` field set to
2866         # an empty string or 0. MozillaCookieJar only recognizes the former
2867         # (see [1]). So we need force the latter to be recognized as session
2868         # cookies on our own.
2869         # Session cookies may be important for cookies-based authentication,
2870         # e.g. usually, when user does not check 'Remember me' check box while
2871         # logging in on a site, some important cookies are stored as session
2872         # cookies so that not recognizing them will result in failed login.
2873         # 1. https://bugs.python.org/issue17164
2874         for cookie in self:
2875             # Treat `expires=0` cookies as session cookies
2876             if cookie.expires == 0:
2877                 cookie.expires = None
2878                 cookie.discard = True
2879
2880
2881 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2882     def __init__(self, cookiejar=None):
2883         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2884
2885     def http_response(self, request, response):
2886         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2887         # characters in Set-Cookie HTTP header of last response (see
2888         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2889         # In order to at least prevent crashing we will percent encode Set-Cookie
2890         # header before HTTPCookieProcessor starts processing it.
2891         # if sys.version_info < (3, 0) and response.headers:
2892         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2893         #         set_cookie = response.headers.get(set_cookie_header)
2894         #         if set_cookie:
2895         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2896         #             if set_cookie != set_cookie_escaped:
2897         #                 del response.headers[set_cookie_header]
2898         #                 response.headers[set_cookie_header] = set_cookie_escaped
2899         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2900
2901     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2902     https_response = http_response
2903
2904
2905 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2906     if sys.version_info[0] < 3:
2907         def redirect_request(self, req, fp, code, msg, headers, newurl):
2908             # On python 2 urlh.geturl() may sometimes return redirect URL
2909             # as byte string instead of unicode. This workaround allows
2910             # to force it always return unicode.
2911             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2912
2913
2914 def extract_timezone(date_str):
2915     m = re.search(
2916         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2917         date_str)
2918     if not m:
2919         timezone = datetime.timedelta()
2920     else:
2921         date_str = date_str[:-len(m.group('tz'))]
2922         if not m.group('sign'):
2923             timezone = datetime.timedelta()
2924         else:
2925             sign = 1 if m.group('sign') == '+' else -1
2926             timezone = datetime.timedelta(
2927                 hours=sign * int(m.group('hours')),
2928                 minutes=sign * int(m.group('minutes')))
2929     return timezone, date_str
2930
2931
2932 def parse_iso8601(date_str, delimiter='T', timezone=None):
2933     """ Return a UNIX timestamp from the given date """
2934
2935     if date_str is None:
2936         return None
2937
2938     date_str = re.sub(r'\.[0-9]+', '', date_str)
2939
2940     if timezone is None:
2941         timezone, date_str = extract_timezone(date_str)
2942
2943     try:
2944         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2945         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2946         return calendar.timegm(dt.timetuple())
2947     except ValueError:
2948         pass
2949
2950
2951 def date_formats(day_first=True):
2952     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2953
2954
2955 def unified_strdate(date_str, day_first=True):
2956     """Return a string with the date in the format YYYYMMDD"""
2957
2958     if date_str is None:
2959         return None
2960     upload_date = None
2961     # Replace commas
2962     date_str = date_str.replace(',', ' ')
2963     # Remove AM/PM + timezone
2964     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2965     _, date_str = extract_timezone(date_str)
2966
2967     for expression in date_formats(day_first):
2968         try:
2969             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2970         except ValueError:
2971             pass
2972     if upload_date is None:
2973         timetuple = email.utils.parsedate_tz(date_str)
2974         if timetuple:
2975             try:
2976                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2977             except ValueError:
2978                 pass
2979     if upload_date is not None:
2980         return compat_str(upload_date)
2981
2982
2983 def unified_timestamp(date_str, day_first=True):
2984     if date_str is None:
2985         return None
2986
2987     date_str = re.sub(r'[,|]', '', date_str)
2988
2989     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2990     timezone, date_str = extract_timezone(date_str)
2991
2992     # Remove AM/PM + timezone
2993     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2994
2995     # Remove unrecognized timezones from ISO 8601 alike timestamps
2996     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2997     if m:
2998         date_str = date_str[:-len(m.group('tz'))]
2999
3000     # Python only supports microseconds, so remove nanoseconds
3001     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3002     if m:
3003         date_str = m.group(1)
3004
3005     for expression in date_formats(day_first):
3006         try:
3007             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3008             return calendar.timegm(dt.timetuple())
3009         except ValueError:
3010             pass
3011     timetuple = email.utils.parsedate_tz(date_str)
3012     if timetuple:
3013         return calendar.timegm(timetuple) + pm_delta * 3600
3014
3015
3016 def determine_ext(url, default_ext='unknown_video'):
3017     if url is None or '.' not in url:
3018         return default_ext
3019     guess = url.partition('?')[0].rpartition('.')[2]
3020     if re.match(r'^[A-Za-z0-9]+$', guess):
3021         return guess
3022     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3023     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3024         return guess.rstrip('/')
3025     else:
3026         return default_ext
3027
3028
3029 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3030     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3031
3032
3033 def date_from_str(date_str):
3034     """
3035     Return a datetime object from a string in the format YYYYMMDD or
3036     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3037     today = datetime.date.today()
3038     if date_str in ('now', 'today'):
3039         return today
3040     if date_str == 'yesterday':
3041         return today - datetime.timedelta(days=1)
3042     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3043     if match is not None:
3044         sign = match.group('sign')
3045         time = int(match.group('time'))
3046         if sign == '-':
3047             time = -time
3048         unit = match.group('unit')
3049         # A bad approximation?
3050         if unit == 'month':
3051             unit = 'day'
3052             time *= 30
3053         elif unit == 'year':
3054             unit = 'day'
3055             time *= 365
3056         unit += 's'
3057         delta = datetime.timedelta(**{unit: time})
3058         return today + delta
3059     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3060
3061
3062 def hyphenate_date(date_str):
3063     """
3064     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3065     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3066     if match is not None:
3067         return '-'.join(match.groups())
3068     else:
3069         return date_str
3070
3071
3072 class DateRange(object):
3073     """Represents a time interval between two dates"""
3074
3075     def __init__(self, start=None, end=None):
3076         """start and end must be strings in the format accepted by date"""
3077         if start is not None:
3078             self.start = date_from_str(start)
3079         else:
3080             self.start = datetime.datetime.min.date()
3081         if end is not None:
3082             self.end = date_from_str(end)
3083         else:
3084             self.end = datetime.datetime.max.date()
3085         if self.start > self.end:
3086             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3087
3088     @classmethod
3089     def day(cls, day):
3090         """Returns a range that only contains the given day"""
3091         return cls(day, day)
3092
3093     def __contains__(self, date):
3094         """Check if the date is in the range"""
3095         if not isinstance(date, datetime.date):
3096             date = date_from_str(date)
3097         return self.start <= date <= self.end
3098
3099     def __str__(self):
3100         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3101
3102
3103 def platform_name():
3104     """ Returns the platform name as a compat_str """
3105     res = platform.platform()
3106     if isinstance(res, bytes):
3107         res = res.decode(preferredencoding())
3108
3109     assert isinstance(res, compat_str)
3110     return res
3111
3112
3113 def _windows_write_string(s, out):
3114     """ Returns True if the string was written using special methods,
3115     False if it has yet to be written out."""
3116     # Adapted from http://stackoverflow.com/a/3259271/35070
3117
3118     import ctypes
3119     import ctypes.wintypes
3120
3121     WIN_OUTPUT_IDS = {
3122         1: -11,
3123         2: -12,
3124     }
3125
3126     try:
3127         fileno = out.fileno()
3128     except AttributeError:
3129         # If the output stream doesn't have a fileno, it's virtual
3130         return False
3131     except io.UnsupportedOperation:
3132         # Some strange Windows pseudo files?
3133         return False
3134     if fileno not in WIN_OUTPUT_IDS:
3135         return False
3136
3137     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3138         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3139         ('GetStdHandle', ctypes.windll.kernel32))
3140     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3141
3142     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3143         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3144         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3145         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3146     written = ctypes.wintypes.DWORD(0)
3147
3148     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3149     FILE_TYPE_CHAR = 0x0002
3150     FILE_TYPE_REMOTE = 0x8000
3151     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3152         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3153         ctypes.POINTER(ctypes.wintypes.DWORD))(
3154         ('GetConsoleMode', ctypes.windll.kernel32))
3155     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3156
3157     def not_a_console(handle):
3158         if handle == INVALID_HANDLE_VALUE or handle is None:
3159             return True
3160         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3161                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3162
3163     if not_a_console(h):
3164         return False
3165
3166     def next_nonbmp_pos(s):
3167         try:
3168             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3169         except StopIteration:
3170             return len(s)
3171
3172     while s:
3173         count = min(next_nonbmp_pos(s), 1024)
3174
3175         ret = WriteConsoleW(
3176             h, s, count if count else 2, ctypes.byref(written), None)
3177         if ret == 0:
3178             raise OSError('Failed to write string')
3179         if not count:  # We just wrote a non-BMP character
3180             assert written.value == 2
3181             s = s[1:]
3182         else:
3183             assert written.value > 0
3184             s = s[written.value:]
3185     return True
3186
3187
3188 def write_string(s, out=None, encoding=None):
3189     if out is None:
3190         out = sys.stderr
3191     assert type(s) == compat_str
3192
3193     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3194         if _windows_write_string(s, out):
3195             return
3196
3197     if ('b' in getattr(out, 'mode', '')
3198             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3199         byt = s.encode(encoding or preferredencoding(), 'ignore')
3200         out.write(byt)
3201     elif hasattr(out, 'buffer'):
3202         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3203         byt = s.encode(enc, 'ignore')
3204         out.buffer.write(byt)
3205     else:
3206         out.write(s)
3207     out.flush()
3208
3209
3210 def bytes_to_intlist(bs):
3211     if not bs:
3212         return []
3213     if isinstance(bs[0], int):  # Python 3
3214         return list(bs)
3215     else:
3216         return [ord(c) for c in bs]
3217
3218
3219 def intlist_to_bytes(xs):
3220     if not xs:
3221         return b''
3222     return compat_struct_pack('%dB' % len(xs), *xs)
3223
3224
3225 # Cross-platform file locking
3226 if sys.platform == 'win32':
3227     import ctypes.wintypes
3228     import msvcrt
3229
3230     class OVERLAPPED(ctypes.Structure):
3231         _fields_ = [
3232             ('Internal', ctypes.wintypes.LPVOID),
3233             ('InternalHigh', ctypes.wintypes.LPVOID),
3234             ('Offset', ctypes.wintypes.DWORD),
3235             ('OffsetHigh', ctypes.wintypes.DWORD),
3236             ('hEvent', ctypes.wintypes.HANDLE),
3237         ]
3238
3239     kernel32 = ctypes.windll.kernel32
3240     LockFileEx = kernel32.LockFileEx
3241     LockFileEx.argtypes = [
3242         ctypes.wintypes.HANDLE,     # hFile
3243         ctypes.wintypes.DWORD,      # dwFlags
3244         ctypes.wintypes.DWORD,      # dwReserved
3245         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3246         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3247         ctypes.POINTER(OVERLAPPED)  # Overlapped
3248     ]
3249     LockFileEx.restype = ctypes.wintypes.BOOL
3250     UnlockFileEx = kernel32.UnlockFileEx
3251     UnlockFileEx.argtypes = [
3252         ctypes.wintypes.HANDLE,     # hFile
3253         ctypes.wintypes.DWORD,      # dwReserved
3254         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3255         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3256         ctypes.POINTER(OVERLAPPED)  # Overlapped
3257     ]
3258     UnlockFileEx.restype = ctypes.wintypes.BOOL
3259     whole_low = 0xffffffff
3260     whole_high = 0x7fffffff
3261
3262     def _lock_file(f, exclusive):
3263         overlapped = OVERLAPPED()
3264         overlapped.Offset = 0
3265         overlapped.OffsetHigh = 0
3266         overlapped.hEvent = 0
3267         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3268         handle = msvcrt.get_osfhandle(f.fileno())
3269         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3270                           whole_low, whole_high, f._lock_file_overlapped_p):
3271             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3272
3273     def _unlock_file(f):
3274         assert f._lock_file_overlapped_p
3275         handle = msvcrt.get_osfhandle(f.fileno())
3276         if not UnlockFileEx(handle, 0,
3277                             whole_low, whole_high, f._lock_file_overlapped_p):
3278             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3279
3280 else:
3281     # Some platforms, such as Jython, is missing fcntl
3282     try:
3283         import fcntl
3284
3285         def _lock_file(f, exclusive):
3286             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3287
3288         def _unlock_file(f):
3289             fcntl.flock(f, fcntl.LOCK_UN)
3290     except ImportError:
3291         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3292
3293         def _lock_file(f, exclusive):
3294             raise IOError(UNSUPPORTED_MSG)
3295
3296         def _unlock_file(f):
3297             raise IOError(UNSUPPORTED_MSG)
3298
3299
3300 class locked_file(object):
3301     def __init__(self, filename, mode, encoding=None):
3302         assert mode in ['r', 'a', 'w']
3303         self.f = io.open(filename, mode, encoding=encoding)
3304         self.mode = mode
3305
3306     def __enter__(self):
3307         exclusive = self.mode != 'r'
3308         try:
3309             _lock_file(self.f, exclusive)
3310         except IOError:
3311             self.f.close()
3312             raise
3313         return self
3314
3315     def __exit__(self, etype, value, traceback):
3316         try:
3317             _unlock_file(self.f)
3318         finally:
3319             self.f.close()
3320
3321     def __iter__(self):
3322         return iter(self.f)
3323
3324     def write(self, *args):
3325         return self.f.write(*args)
3326
3327     def read(self, *args):
3328         return self.f.read(*args)
3329
3330
3331 def get_filesystem_encoding():
3332     encoding = sys.getfilesystemencoding()
3333     return encoding if encoding is not None else 'utf-8'
3334
3335
3336 def shell_quote(args):
3337     quoted_args = []
3338     encoding = get_filesystem_encoding()
3339     for a in args:
3340         if isinstance(a, bytes):
3341             # We may get a filename encoded with 'encodeFilename'
3342             a = a.decode(encoding)
3343         quoted_args.append(compat_shlex_quote(a))
3344     return ' '.join(quoted_args)
3345
3346
3347 def smuggle_url(url, data):
3348     """ Pass additional data in a URL for internal use. """
3349
3350     url, idata = unsmuggle_url(url, {})
3351     data.update(idata)
3352     sdata = compat_urllib_parse_urlencode(
3353         {'__youtubedl_smuggle': json.dumps(data)})
3354     return url + '#' + sdata
3355
3356
3357 def unsmuggle_url(smug_url, default=None):
3358     if '#__youtubedl_smuggle' not in smug_url:
3359         return smug_url, default
3360     url, _, sdata = smug_url.rpartition('#')
3361     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3362     data = json.loads(jsond)
3363     return url, data
3364
3365
3366 def format_bytes(bytes):
3367     if bytes is None:
3368         return 'N/A'
3369     if type(bytes) is str:
3370         bytes = float(bytes)
3371     if bytes == 0.0:
3372         exponent = 0
3373     else:
3374         exponent = int(math.log(bytes, 1024.0))
3375     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3376     converted = float(bytes) / float(1024 ** exponent)
3377     return '%.2f%s' % (converted, suffix)
3378
3379
3380 def lookup_unit_table(unit_table, s):
3381     units_re = '|'.join(re.escape(u) for u in unit_table)
3382     m = re.match(
3383         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3384     if not m:
3385         return None
3386     num_str = m.group('num').replace(',', '.')
3387     mult = unit_table[m.group('unit')]
3388     return int(float(num_str) * mult)
3389
3390
3391 def parse_filesize(s):
3392     if s is None:
3393         return None
3394
3395     # The lower-case forms are of course incorrect and unofficial,
3396     # but we support those too
3397     _UNIT_TABLE = {
3398         'B': 1,
3399         'b': 1,
3400         'bytes': 1,
3401         'KiB': 1024,
3402         'KB': 1000,
3403         'kB': 1024,
3404         'Kb': 1000,
3405         'kb': 1000,
3406         'kilobytes': 1000,
3407         'kibibytes': 1024,
3408         'MiB': 1024 ** 2,
3409         'MB': 1000 ** 2,
3410         'mB': 1024 ** 2,
3411         'Mb': 1000 ** 2,
3412         'mb': 1000 ** 2,
3413         'megabytes': 1000 ** 2,
3414         'mebibytes': 1024 ** 2,
3415         'GiB': 1024 ** 3,
3416         'GB': 1000 ** 3,
3417         'gB': 1024 ** 3,
3418         'Gb': 1000 ** 3,
3419         'gb': 1000 ** 3,
3420         'gigabytes': 1000 ** 3,
3421         'gibibytes': 1024 ** 3,
3422         'TiB': 1024 ** 4,
3423         'TB': 1000 ** 4,
3424         'tB': 1024 ** 4,
3425         'Tb': 1000 ** 4,
3426         'tb': 1000 ** 4,
3427         'terabytes': 1000 ** 4,
3428         'tebibytes': 1024 ** 4,
3429         'PiB': 1024 ** 5,
3430         'PB': 1000 ** 5,
3431         'pB': 1024 ** 5,
3432         'Pb': 1000 ** 5,
3433         'pb': 1000 ** 5,
3434         'petabytes': 1000 ** 5,
3435         'pebibytes': 1024 ** 5,
3436         'EiB': 1024 ** 6,
3437         'EB': 1000 ** 6,
3438         'eB': 1024 ** 6,
3439         'Eb': 1000 ** 6,
3440         'eb': 1000 ** 6,
3441         'exabytes': 1000 ** 6,
3442         'exbibytes': 1024 ** 6,
3443         'ZiB': 1024 ** 7,
3444         'ZB': 1000 ** 7,
3445         'zB': 1024 ** 7,
3446         'Zb': 1000 ** 7,
3447         'zb': 1000 ** 7,
3448         'zettabytes': 1000 ** 7,
3449         'zebibytes': 1024 ** 7,
3450         'YiB': 1024 ** 8,
3451         'YB': 1000 ** 8,
3452         'yB': 1024 ** 8,
3453         'Yb': 1000 ** 8,
3454         'yb': 1000 ** 8,
3455         'yottabytes': 1000 ** 8,
3456         'yobibytes': 1024 ** 8,
3457     }
3458
3459     return lookup_unit_table(_UNIT_TABLE, s)
3460
3461
3462 def parse_count(s):
3463     if s is None:
3464         return None
3465
3466     s = s.strip()
3467
3468     if re.match(r'^[\d,.]+$', s):
3469         return str_to_int(s)
3470
3471     _UNIT_TABLE = {
3472         'k': 1000,
3473         'K': 1000,
3474         'm': 1000 ** 2,
3475         'M': 1000 ** 2,
3476         'kk': 1000 ** 2,
3477         'KK': 1000 ** 2,
3478     }
3479
3480     return lookup_unit_table(_UNIT_TABLE, s)
3481
3482
3483 def parse_resolution(s):
3484     if s is None:
3485         return {}
3486
3487     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3488     if mobj:
3489         return {
3490             'width': int(mobj.group('w')),
3491             'height': int(mobj.group('h')),
3492         }
3493
3494     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3495     if mobj:
3496         return {'height': int(mobj.group(1))}
3497
3498     mobj = re.search(r'\b([48])[kK]\b', s)
3499     if mobj:
3500         return {'height': int(mobj.group(1)) * 540}
3501
3502     return {}
3503
3504
3505 def parse_bitrate(s):
3506     if not isinstance(s, compat_str):
3507         return
3508     mobj = re.search(r'\b(\d+)\s*kbps', s)
3509     if mobj:
3510         return int(mobj.group(1))
3511
3512
3513 def month_by_name(name, lang='en'):
3514     """ Return the number of a month by (locale-independently) English name """
3515
3516     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3517
3518     try:
3519         return month_names.index(name) + 1
3520     except ValueError:
3521         return None
3522
3523
3524 def month_by_abbreviation(abbrev):
3525     """ Return the number of a month by (locale-independently) English
3526         abbreviations """
3527
3528     try:
3529         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3530     except ValueError:
3531         return None
3532
3533
3534 def fix_xml_ampersands(xml_str):
3535     """Replace all the '&' by '&amp;' in XML"""
3536     return re.sub(
3537         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3538         '&amp;',
3539         xml_str)
3540
3541
3542 def setproctitle(title):
3543     assert isinstance(title, compat_str)
3544
3545     # ctypes in Jython is not complete
3546     # http://bugs.jython.org/issue2148
3547     if sys.platform.startswith('java'):
3548         return
3549
3550     try:
3551         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3552     except OSError:
3553         return
3554     except TypeError:
3555         # LoadLibrary in Windows Python 2.7.13 only expects
3556         # a bytestring, but since unicode_literals turns
3557         # every string into a unicode string, it fails.
3558         return
3559     title_bytes = title.encode('utf-8')
3560     buf = ctypes.create_string_buffer(len(title_bytes))
3561     buf.value = title_bytes
3562     try:
3563         libc.prctl(15, buf, 0, 0, 0)
3564     except AttributeError:
3565         return  # Strange libc, just skip this
3566
3567
3568 def remove_start(s, start):
3569     return s[len(start):] if s is not None and s.startswith(start) else s
3570
3571
3572 def remove_end(s, end):
3573     return s[:-len(end)] if s is not None and s.endswith(end) else s
3574
3575
3576 def remove_quotes(s):
3577     if s is None or len(s) < 2:
3578         return s
3579     for quote in ('"', "'", ):
3580         if s[0] == quote and s[-1] == quote:
3581             return s[1:-1]
3582     return s
3583
3584
3585 def get_domain(url):
3586     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3587     return domain.group('domain') if domain else None
3588
3589
3590 def url_basename(url):
3591     path = compat_urlparse.urlparse(url).path
3592     return path.strip('/').split('/')[-1]
3593
3594
3595 def base_url(url):
3596     return re.match(r'https?://[^?#&]+/', url).group()
3597
3598
3599 def urljoin(base, path):
3600     if isinstance(path, bytes):
3601         path = path.decode('utf-8')
3602     if not isinstance(path, compat_str) or not path:
3603         return None
3604     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3605         return path
3606     if isinstance(base, bytes):
3607         base = base.decode('utf-8')
3608     if not isinstance(base, compat_str) or not re.match(
3609             r'^(?:https?:)?//', base):
3610         return None
3611     return compat_urlparse.urljoin(base, path)
3612
3613
3614 class HEADRequest(compat_urllib_request.Request):
3615     def get_method(self):
3616         return 'HEAD'
3617
3618
3619 class PUTRequest(compat_urllib_request.Request):
3620     def get_method(self):
3621         return 'PUT'
3622
3623
3624 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3625     if get_attr:
3626         if v is not None:
3627             v = getattr(v, get_attr, None)
3628     if v == '':
3629         v = None
3630     if v is None:
3631         return default
3632     try:
3633         return int(v) * invscale // scale
3634     except (ValueError, TypeError):
3635         return default
3636
3637
3638 def str_or_none(v, default=None):
3639     return default if v is None else compat_str(v)
3640
3641
3642 def str_to_int(int_str):
3643     """ A more relaxed version of int_or_none """
3644     if isinstance(int_str, compat_integer_types):
3645         return int_str
3646     elif isinstance(int_str, compat_str):
3647         int_str = re.sub(r'[,\.\+]', '', int_str)
3648         return int_or_none(int_str)
3649
3650
3651 def float_or_none(v, scale=1, invscale=1, default=None):
3652     if v is None:
3653         return default
3654     try:
3655         return float(v) * invscale / scale
3656     except (ValueError, TypeError):
3657         return default
3658
3659
3660 def bool_or_none(v, default=None):
3661     return v if isinstance(v, bool) else default
3662
3663
3664 def strip_or_none(v, default=None):
3665     return v.strip() if isinstance(v, compat_str) else default
3666
3667
3668 def url_or_none(url):
3669     if not url or not isinstance(url, compat_str):
3670         return None
3671     url = url.strip()
3672     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3673
3674
3675 def parse_duration(s):
3676     if not isinstance(s, compat_basestring):
3677         return None
3678
3679     s = s.strip()
3680
3681     days, hours, mins, secs, ms = [None] * 5
3682     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3683     if m:
3684         days, hours, mins, secs, ms = m.groups()
3685     else:
3686         m = re.match(
3687             r'''(?ix)(?:P?
3688                 (?:
3689                     [0-9]+\s*y(?:ears?)?\s*
3690                 )?
3691                 (?:
3692                     [0-9]+\s*m(?:onths?)?\s*
3693                 )?
3694                 (?:
3695                     [0-9]+\s*w(?:eeks?)?\s*
3696                 )?
3697                 (?:
3698                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3699                 )?
3700                 T)?
3701                 (?:
3702                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3703                 )?
3704                 (?:
3705                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3706                 )?
3707                 (?:
3708                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3709                 )?Z?$''', s)
3710         if m:
3711             days, hours, mins, secs, ms = m.groups()
3712         else:
3713             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3714             if m:
3715                 hours, mins = m.groups()
3716             else:
3717                 return None
3718
3719     duration = 0
3720     if secs:
3721         duration += float(secs)
3722     if mins:
3723         duration += float(mins) * 60
3724     if hours:
3725         duration += float(hours) * 60 * 60
3726     if days:
3727         duration += float(days) * 24 * 60 * 60
3728     if ms:
3729         duration += float(ms)
3730     return duration
3731
3732
3733 def prepend_extension(filename, ext, expected_real_ext=None):
3734     name, real_ext = os.path.splitext(filename)
3735     return (
3736         '{0}.{1}{2}'.format(name, ext, real_ext)
3737         if not expected_real_ext or real_ext[1:] == expected_real_ext
3738         else '{0}.{1}'.format(filename, ext))
3739
3740
3741 def replace_extension(filename, ext, expected_real_ext=None):
3742     name, real_ext = os.path.splitext(filename)
3743     return '{0}.{1}'.format(
3744         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3745         ext)
3746
3747
3748 def check_executable(exe, args=[]):
3749     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3750     args can be a list of arguments for a short output (like -version) """
3751     try:
3752         process_communicate_or_kill(subprocess.Popen(
3753             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3754     except OSError:
3755         return False
3756     return exe
3757
3758
3759 def get_exe_version(exe, args=['--version'],
3760                     version_re=None, unrecognized='present'):
3761     """ Returns the version of the specified executable,
3762     or False if the executable is not present """
3763     try:
3764         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3765         # SIGTTOU if youtube-dlc is run in the background.
3766         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3767         out, _ = process_communicate_or_kill(subprocess.Popen(
3768             [encodeArgument(exe)] + args,
3769             stdin=subprocess.PIPE,
3770             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3771     except OSError:
3772         return False
3773     if isinstance(out, bytes):  # Python 2.x
3774         out = out.decode('ascii', 'ignore')
3775     return detect_exe_version(out, version_re, unrecognized)
3776
3777
3778 def detect_exe_version(output, version_re=None, unrecognized='present'):
3779     assert isinstance(output, compat_str)
3780     if version_re is None:
3781         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3782     m = re.search(version_re, output)
3783     if m:
3784         return m.group(1)
3785     else:
3786         return unrecognized
3787
3788
3789 class PagedList(object):
3790     def __len__(self):
3791         # This is only useful for tests
3792         return len(self.getslice())
3793
3794
3795 class OnDemandPagedList(PagedList):
3796     def __init__(self, pagefunc, pagesize, use_cache=True):
3797         self._pagefunc = pagefunc
3798         self._pagesize = pagesize
3799         self._use_cache = use_cache
3800         if use_cache:
3801             self._cache = {}
3802
3803     def getslice(self, start=0, end=None):
3804         res = []
3805         for pagenum in itertools.count(start // self._pagesize):
3806             firstid = pagenum * self._pagesize
3807             nextfirstid = pagenum * self._pagesize + self._pagesize
3808             if start >= nextfirstid:
3809                 continue
3810
3811             page_results = None
3812             if self._use_cache:
3813                 page_results = self._cache.get(pagenum)
3814             if page_results is None:
3815                 page_results = list(self._pagefunc(pagenum))
3816             if self._use_cache:
3817                 self._cache[pagenum] = page_results
3818
3819             startv = (
3820                 start % self._pagesize
3821                 if firstid <= start < nextfirstid
3822                 else 0)
3823
3824             endv = (
3825                 ((end - 1) % self._pagesize) + 1
3826                 if (end is not None and firstid <= end <= nextfirstid)
3827                 else None)
3828
3829             if startv != 0 or endv is not None:
3830                 page_results = page_results[startv:endv]
3831             res.extend(page_results)
3832
3833             # A little optimization - if current page is not "full", ie. does
3834             # not contain page_size videos then we can assume that this page
3835             # is the last one - there are no more ids on further pages -
3836             # i.e. no need to query again.
3837             if len(page_results) + startv < self._pagesize:
3838                 break
3839
3840             # If we got the whole page, but the next page is not interesting,
3841             # break out early as well
3842             if end == nextfirstid:
3843                 break
3844         return res
3845
3846
3847 class InAdvancePagedList(PagedList):
3848     def __init__(self, pagefunc, pagecount, pagesize):
3849         self._pagefunc = pagefunc
3850         self._pagecount = pagecount
3851         self._pagesize = pagesize
3852
3853     def getslice(self, start=0, end=None):
3854         res = []
3855         start_page = start // self._pagesize
3856         end_page = (
3857             self._pagecount if end is None else (end // self._pagesize + 1))
3858         skip_elems = start - start_page * self._pagesize
3859         only_more = None if end is None else end - start
3860         for pagenum in range(start_page, end_page):
3861             page = list(self._pagefunc(pagenum))
3862             if skip_elems:
3863                 page = page[skip_elems:]
3864                 skip_elems = None
3865             if only_more is not None:
3866                 if len(page) < only_more:
3867                     only_more -= len(page)
3868                 else:
3869                     page = page[:only_more]
3870                     res.extend(page)
3871                     break
3872             res.extend(page)
3873         return res
3874
3875
3876 def uppercase_escape(s):
3877     unicode_escape = codecs.getdecoder('unicode_escape')
3878     return re.sub(
3879         r'\\U[0-9a-fA-F]{8}',
3880         lambda m: unicode_escape(m.group(0))[0],
3881         s)
3882
3883
3884 def lowercase_escape(s):
3885     unicode_escape = codecs.getdecoder('unicode_escape')
3886     return re.sub(
3887         r'\\u[0-9a-fA-F]{4}',
3888         lambda m: unicode_escape(m.group(0))[0],
3889         s)
3890
3891
3892 def escape_rfc3986(s):
3893     """Escape non-ASCII characters as suggested by RFC 3986"""
3894     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3895         s = s.encode('utf-8')
3896     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3897
3898
3899 def escape_url(url):
3900     """Escape URL as suggested by RFC 3986"""
3901     url_parsed = compat_urllib_parse_urlparse(url)
3902     return url_parsed._replace(
3903         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3904         path=escape_rfc3986(url_parsed.path),
3905         params=escape_rfc3986(url_parsed.params),
3906         query=escape_rfc3986(url_parsed.query),
3907         fragment=escape_rfc3986(url_parsed.fragment)
3908     ).geturl()
3909
3910
3911 def read_batch_urls(batch_fd):
3912     def fixup(url):
3913         if not isinstance(url, compat_str):
3914             url = url.decode('utf-8', 'replace')
3915         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3916         for bom in BOM_UTF8:
3917             if url.startswith(bom):
3918                 url = url[len(bom):]
3919         url = url.lstrip()
3920         if not url or url.startswith(('#', ';', ']')):
3921             return False
3922         # "#" cannot be stripped out since it is part of the URI
3923         # However, it can be safely stipped out if follwing a whitespace
3924         return re.split(r'\s#', url, 1)[0].rstrip()
3925
3926     with contextlib.closing(batch_fd) as fd:
3927         return [url for url in map(fixup, fd) if url]
3928
3929
3930 def urlencode_postdata(*args, **kargs):
3931     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3932
3933
3934 def update_url_query(url, query):
3935     if not query:
3936         return url
3937     parsed_url = compat_urlparse.urlparse(url)
3938     qs = compat_parse_qs(parsed_url.query)
3939     qs.update(query)
3940     return compat_urlparse.urlunparse(parsed_url._replace(
3941         query=compat_urllib_parse_urlencode(qs, True)))
3942
3943
3944 def update_Request(req, url=None, data=None, headers={}, query={}):
3945     req_headers = req.headers.copy()
3946     req_headers.update(headers)
3947     req_data = data or req.data
3948     req_url = update_url_query(url or req.get_full_url(), query)
3949     req_get_method = req.get_method()
3950     if req_get_method == 'HEAD':
3951         req_type = HEADRequest
3952     elif req_get_method == 'PUT':
3953         req_type = PUTRequest
3954     else:
3955         req_type = compat_urllib_request.Request
3956     new_req = req_type(
3957         req_url, data=req_data, headers=req_headers,
3958         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3959     if hasattr(req, 'timeout'):
3960         new_req.timeout = req.timeout
3961     return new_req
3962
3963
3964 def _multipart_encode_impl(data, boundary):
3965     content_type = 'multipart/form-data; boundary=%s' % boundary
3966
3967     out = b''
3968     for k, v in data.items():
3969         out += b'--' + boundary.encode('ascii') + b'\r\n'
3970         if isinstance(k, compat_str):
3971             k = k.encode('utf-8')
3972         if isinstance(v, compat_str):
3973             v = v.encode('utf-8')
3974         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3975         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3976         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3977         if boundary.encode('ascii') in content:
3978             raise ValueError('Boundary overlaps with data')
3979         out += content
3980
3981     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3982
3983     return out, content_type
3984
3985
3986 def multipart_encode(data, boundary=None):
3987     '''
3988     Encode a dict to RFC 7578-compliant form-data
3989
3990     data:
3991         A dict where keys and values can be either Unicode or bytes-like
3992         objects.
3993     boundary:
3994         If specified a Unicode object, it's used as the boundary. Otherwise
3995         a random boundary is generated.
3996
3997     Reference: https://tools.ietf.org/html/rfc7578
3998     '''
3999     has_specified_boundary = boundary is not None
4000
4001     while True:
4002         if boundary is None:
4003             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4004
4005         try:
4006             out, content_type = _multipart_encode_impl(data, boundary)
4007             break
4008         except ValueError:
4009             if has_specified_boundary:
4010                 raise
4011             boundary = None
4012
4013     return out, content_type
4014
4015
4016 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4017     if isinstance(key_or_keys, (list, tuple)):
4018         for key in key_or_keys:
4019             if key not in d or d[key] is None or skip_false_values and not d[key]:
4020                 continue
4021             return d[key]
4022         return default
4023     return d.get(key_or_keys, default)
4024
4025
4026 def try_get(src, getter, expected_type=None):
4027     if not isinstance(getter, (list, tuple)):
4028         getter = [getter]
4029     for get in getter:
4030         try:
4031             v = get(src)
4032         except (AttributeError, KeyError, TypeError, IndexError):
4033             pass
4034         else:
4035             if expected_type is None or isinstance(v, expected_type):
4036                 return v
4037
4038
4039 def merge_dicts(*dicts):
4040     merged = {}
4041     for a_dict in dicts:
4042         for k, v in a_dict.items():
4043             if v is None:
4044                 continue
4045             if (k not in merged
4046                     or (isinstance(v, compat_str) and v
4047                         and isinstance(merged[k], compat_str)
4048                         and not merged[k])):
4049                 merged[k] = v
4050     return merged
4051
4052
4053 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4054     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4055
4056
4057 US_RATINGS = {
4058     'G': 0,
4059     'PG': 10,
4060     'PG-13': 13,
4061     'R': 16,
4062     'NC': 18,
4063 }
4064
4065
4066 TV_PARENTAL_GUIDELINES = {
4067     'TV-Y': 0,
4068     'TV-Y7': 7,
4069     'TV-G': 0,
4070     'TV-PG': 0,
4071     'TV-14': 14,
4072     'TV-MA': 17,
4073 }
4074
4075
4076 def parse_age_limit(s):
4077     if type(s) == int:
4078         return s if 0 <= s <= 21 else None
4079     if not isinstance(s, compat_basestring):
4080         return None
4081     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4082     if m:
4083         return int(m.group('age'))
4084     if s in US_RATINGS:
4085         return US_RATINGS[s]
4086     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4087     if m:
4088         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4089     return None
4090
4091
4092 def strip_jsonp(code):
4093     return re.sub(
4094         r'''(?sx)^
4095             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4096             (?:\s*&&\s*(?P=func_name))?
4097             \s*\(\s*(?P<callback_data>.*)\);?
4098             \s*?(?://[^\n]*)*$''',
4099         r'\g<callback_data>', code)
4100
4101
4102 def js_to_json(code, vars={}):
4103     # vars is a dict of var, val pairs to substitute
4104     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4105     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4106     INTEGER_TABLE = (
4107         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4108         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4109     )
4110
4111     def fix_kv(m):
4112         v = m.group(0)
4113         if v in ('true', 'false', 'null'):
4114             return v
4115         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4116             return ""
4117
4118         if v[0] in ("'", '"'):
4119             v = re.sub(r'(?s)\\.|"', lambda m: {
4120                 '"': '\\"',
4121                 "\\'": "'",
4122                 '\\\n': '',
4123                 '\\x': '\\u00',
4124             }.get(m.group(0), m.group(0)), v[1:-1])
4125         else:
4126             for regex, base in INTEGER_TABLE:
4127                 im = re.match(regex, v)
4128                 if im:
4129                     i = int(im.group(1), base)
4130                     return '"%d":' % i if v.endswith(':') else '%d' % i
4131
4132             if v in vars:
4133                 return vars[v]
4134
4135         return '"%s"' % v
4136
4137     return re.sub(r'''(?sx)
4138         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4139         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4140         {comment}|,(?={skip}[\]}}])|
4141         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4142         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4143         [0-9]+(?={skip}:)|
4144         !+
4145         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4146
4147
4148 def qualities(quality_ids):
4149     """ Get a numeric quality value out of a list of possible values """
4150     def q(qid):
4151         try:
4152             return quality_ids.index(qid)
4153         except ValueError:
4154             return -1
4155     return q
4156
4157
4158 DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
4159
4160
4161 def limit_length(s, length):
4162     """ Add ellipses to overly long strings """
4163     if s is None:
4164         return None
4165     ELLIPSES = '...'
4166     if len(s) > length:
4167         return s[:length - len(ELLIPSES)] + ELLIPSES
4168     return s
4169
4170
4171 def version_tuple(v):
4172     return tuple(int(e) for e in re.split(r'[-.]', v))
4173
4174
4175 def is_outdated_version(version, limit, assume_new=True):
4176     if not version:
4177         return not assume_new
4178     try:
4179         return version_tuple(version) < version_tuple(limit)
4180     except ValueError:
4181         return not assume_new
4182
4183
4184 def ytdl_is_updateable():
4185     """ Returns if youtube-dlc can be updated with -U """
4186     return False
4187
4188     from zipimport import zipimporter
4189
4190     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4191
4192
4193 def args_to_str(args):
4194     # Get a short string representation for a subprocess command
4195     return ' '.join(compat_shlex_quote(a) for a in args)
4196
4197
4198 def error_to_compat_str(err):
4199     err_str = str(err)
4200     # On python 2 error byte string must be decoded with proper
4201     # encoding rather than ascii
4202     if sys.version_info[0] < 3:
4203         err_str = err_str.decode(preferredencoding())
4204     return err_str
4205
4206
4207 def mimetype2ext(mt):
4208     if mt is None:
4209         return None
4210
4211     ext = {
4212         'audio/mp4': 'm4a',
4213         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4214         # it's the most popular one
4215         'audio/mpeg': 'mp3',
4216         'audio/x-wav': 'wav',
4217     }.get(mt)
4218     if ext is not None:
4219         return ext
4220
4221     _, _, res = mt.rpartition('/')
4222     res = res.split(';')[0].strip().lower()
4223
4224     return {
4225         '3gpp': '3gp',
4226         'smptett+xml': 'tt',
4227         'ttaf+xml': 'dfxp',
4228         'ttml+xml': 'ttml',
4229         'x-flv': 'flv',
4230         'x-mp4-fragmented': 'mp4',
4231         'x-ms-sami': 'sami',
4232         'x-ms-wmv': 'wmv',
4233         'mpegurl': 'm3u8',
4234         'x-mpegurl': 'm3u8',
4235         'vnd.apple.mpegurl': 'm3u8',
4236         'dash+xml': 'mpd',
4237         'f4m+xml': 'f4m',
4238         'hds+xml': 'f4m',
4239         'vnd.ms-sstr+xml': 'ism',
4240         'quicktime': 'mov',
4241         'mp2t': 'ts',
4242         'x-wav': 'wav',
4243     }.get(res, res)
4244
4245
4246 def parse_codecs(codecs_str):
4247     # http://tools.ietf.org/html/rfc6381
4248     if not codecs_str:
4249         return {}
4250     split_codecs = list(filter(None, map(
4251         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4252     vcodec, acodec = None, None
4253     for full_codec in split_codecs:
4254         codec = full_codec.split('.')[0]
4255         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4256             if not vcodec:
4257                 vcodec = full_codec
4258         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4259             if not acodec:
4260                 acodec = full_codec
4261         else:
4262             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4263     if not vcodec and not acodec:
4264         if len(split_codecs) == 2:
4265             return {
4266                 'vcodec': split_codecs[0],
4267                 'acodec': split_codecs[1],
4268             }
4269     else:
4270         return {
4271             'vcodec': vcodec or 'none',
4272             'acodec': acodec or 'none',
4273         }
4274     return {}
4275
4276
4277 def urlhandle_detect_ext(url_handle):
4278     getheader = url_handle.headers.get
4279
4280     cd = getheader('Content-Disposition')
4281     if cd:
4282         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4283         if m:
4284             e = determine_ext(m.group('filename'), default_ext=None)
4285             if e:
4286                 return e
4287
4288     return mimetype2ext(getheader('Content-Type'))
4289
4290
4291 def encode_data_uri(data, mime_type):
4292     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4293
4294
4295 def age_restricted(content_limit, age_limit):
4296     """ Returns True iff the content should be blocked """
4297
4298     if age_limit is None:  # No limit set
4299         return False
4300     if content_limit is None:
4301         return False  # Content available for everyone
4302     return age_limit < content_limit
4303
4304
4305 def is_html(first_bytes):
4306     """ Detect whether a file contains HTML by examining its first bytes. """
4307
4308     BOMS = [
4309         (b'\xef\xbb\xbf', 'utf-8'),
4310         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4311         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4312         (b'\xff\xfe', 'utf-16-le'),
4313         (b'\xfe\xff', 'utf-16-be'),
4314     ]
4315     for bom, enc in BOMS:
4316         if first_bytes.startswith(bom):
4317             s = first_bytes[len(bom):].decode(enc, 'replace')
4318             break
4319     else:
4320         s = first_bytes.decode('utf-8', 'replace')
4321
4322     return re.match(r'^\s*<', s)
4323
4324
4325 def determine_protocol(info_dict):
4326     protocol = info_dict.get('protocol')
4327     if protocol is not None:
4328         return protocol
4329
4330     url = info_dict['url']
4331     if url.startswith('rtmp'):
4332         return 'rtmp'
4333     elif url.startswith('mms'):
4334         return 'mms'
4335     elif url.startswith('rtsp'):
4336         return 'rtsp'
4337
4338     ext = determine_ext(url)
4339     if ext == 'm3u8':
4340         return 'm3u8'
4341     elif ext == 'f4m':
4342         return 'f4m'
4343
4344     return compat_urllib_parse_urlparse(url).scheme
4345
4346
4347 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4348     """ Render a list of rows, each as a list of values """
4349
4350     def get_max_lens(table):
4351         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4352
4353     def filter_using_list(row, filterArray):
4354         return [col for (take, col) in zip(filterArray, row) if take]
4355
4356     if hideEmpty:
4357         max_lens = get_max_lens(data)
4358         header_row = filter_using_list(header_row, max_lens)
4359         data = [filter_using_list(row, max_lens) for row in data]
4360
4361     table = [header_row] + data
4362     max_lens = get_max_lens(table)
4363     if delim:
4364         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4365     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4366     return '\n'.join(format_str % tuple(row) for row in table)
4367
4368
4369 def _match_one(filter_part, dct):
4370     COMPARISON_OPERATORS = {
4371         '<': operator.lt,
4372         '<=': operator.le,
4373         '>': operator.gt,
4374         '>=': operator.ge,
4375         '=': operator.eq,
4376         '!=': operator.ne,
4377     }
4378     operator_rex = re.compile(r'''(?x)\s*
4379         (?P<key>[a-z_]+)
4380         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4381         (?:
4382             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4383             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4384             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4385         )
4386         \s*$
4387         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4388     m = operator_rex.search(filter_part)
4389     if m:
4390         op = COMPARISON_OPERATORS[m.group('op')]
4391         actual_value = dct.get(m.group('key'))
4392         if (m.group('quotedstrval') is not None
4393             or m.group('strval') is not None
4394             # If the original field is a string and matching comparisonvalue is
4395             # a number we should respect the origin of the original field
4396             # and process comparison value as a string (see
4397             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4398             or actual_value is not None and m.group('intval') is not None
4399                 and isinstance(actual_value, compat_str)):
4400             if m.group('op') not in ('=', '!='):
4401                 raise ValueError(
4402                     'Operator %s does not support string values!' % m.group('op'))
4403             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4404             quote = m.group('quote')
4405             if quote is not None:
4406                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4407         else:
4408             try:
4409                 comparison_value = int(m.group('intval'))
4410             except ValueError:
4411                 comparison_value = parse_filesize(m.group('intval'))
4412                 if comparison_value is None:
4413                     comparison_value = parse_filesize(m.group('intval') + 'B')
4414                 if comparison_value is None:
4415                     raise ValueError(
4416                         'Invalid integer value %r in filter part %r' % (
4417                             m.group('intval'), filter_part))
4418         if actual_value is None:
4419             return m.group('none_inclusive')
4420         return op(actual_value, comparison_value)
4421
4422     UNARY_OPERATORS = {
4423         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4424         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4425     }
4426     operator_rex = re.compile(r'''(?x)\s*
4427         (?P<op>%s)\s*(?P<key>[a-z_]+)
4428         \s*$
4429         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4430     m = operator_rex.search(filter_part)
4431     if m:
4432         op = UNARY_OPERATORS[m.group('op')]
4433         actual_value = dct.get(m.group('key'))
4434         return op(actual_value)
4435
4436     raise ValueError('Invalid filter part %r' % filter_part)
4437
4438
4439 def match_str(filter_str, dct):
4440     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4441
4442     return all(
4443         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4444
4445
4446 def match_filter_func(filter_str):
4447     def _match_func(info_dict):
4448         if match_str(filter_str, info_dict):
4449             return None
4450         else:
4451             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4452             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4453     return _match_func
4454
4455
4456 def parse_dfxp_time_expr(time_expr):
4457     if not time_expr:
4458         return
4459
4460     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4461     if mobj:
4462         return float(mobj.group('time_offset'))
4463
4464     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4465     if mobj:
4466         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4467
4468
4469 def srt_subtitles_timecode(seconds):
4470     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4471
4472
4473 def dfxp2srt(dfxp_data):
4474     '''
4475     @param dfxp_data A bytes-like object containing DFXP data
4476     @returns A unicode object containing converted SRT data
4477     '''
4478     LEGACY_NAMESPACES = (
4479         (b'http://www.w3.org/ns/ttml', [
4480             b'http://www.w3.org/2004/11/ttaf1',
4481             b'http://www.w3.org/2006/04/ttaf1',
4482             b'http://www.w3.org/2006/10/ttaf1',
4483         ]),
4484         (b'http://www.w3.org/ns/ttml#styling', [
4485             b'http://www.w3.org/ns/ttml#style',
4486         ]),
4487     )
4488
4489     SUPPORTED_STYLING = [
4490         'color',
4491         'fontFamily',
4492         'fontSize',
4493         'fontStyle',
4494         'fontWeight',
4495         'textDecoration'
4496     ]
4497
4498     _x = functools.partial(xpath_with_ns, ns_map={
4499         'xml': 'http://www.w3.org/XML/1998/namespace',
4500         'ttml': 'http://www.w3.org/ns/ttml',
4501         'tts': 'http://www.w3.org/ns/ttml#styling',
4502     })
4503
4504     styles = {}
4505     default_style = {}
4506
4507     class TTMLPElementParser(object):
4508         _out = ''
4509         _unclosed_elements = []
4510         _applied_styles = []
4511
4512         def start(self, tag, attrib):
4513             if tag in (_x('ttml:br'), 'br'):
4514                 self._out += '\n'
4515             else:
4516                 unclosed_elements = []
4517                 style = {}
4518                 element_style_id = attrib.get('style')
4519                 if default_style:
4520                     style.update(default_style)
4521                 if element_style_id:
4522                     style.update(styles.get(element_style_id, {}))
4523                 for prop in SUPPORTED_STYLING:
4524                     prop_val = attrib.get(_x('tts:' + prop))
4525                     if prop_val:
4526                         style[prop] = prop_val
4527                 if style:
4528                     font = ''
4529                     for k, v in sorted(style.items()):
4530                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4531                             continue
4532                         if k == 'color':
4533                             font += ' color="%s"' % v
4534                         elif k == 'fontSize':
4535                             font += ' size="%s"' % v
4536                         elif k == 'fontFamily':
4537                             font += ' face="%s"' % v
4538                         elif k == 'fontWeight' and v == 'bold':
4539                             self._out += '<b>'
4540                             unclosed_elements.append('b')
4541                         elif k == 'fontStyle' and v == 'italic':
4542                             self._out += '<i>'
4543                             unclosed_elements.append('i')
4544                         elif k == 'textDecoration' and v == 'underline':
4545                             self._out += '<u>'
4546                             unclosed_elements.append('u')
4547                     if font:
4548                         self._out += '<font' + font + '>'
4549                         unclosed_elements.append('font')
4550                     applied_style = {}
4551                     if self._applied_styles:
4552                         applied_style.update(self._applied_styles[-1])
4553                     applied_style.update(style)
4554                     self._applied_styles.append(applied_style)
4555                 self._unclosed_elements.append(unclosed_elements)
4556
4557         def end(self, tag):
4558             if tag not in (_x('ttml:br'), 'br'):
4559                 unclosed_elements = self._unclosed_elements.pop()
4560                 for element in reversed(unclosed_elements):
4561                     self._out += '</%s>' % element
4562                 if unclosed_elements and self._applied_styles:
4563                     self._applied_styles.pop()
4564
4565         def data(self, data):
4566             self._out += data
4567
4568         def close(self):
4569             return self._out.strip()
4570
4571     def parse_node(node):
4572         target = TTMLPElementParser()
4573         parser = xml.etree.ElementTree.XMLParser(target=target)
4574         parser.feed(xml.etree.ElementTree.tostring(node))
4575         return parser.close()
4576
4577     for k, v in LEGACY_NAMESPACES:
4578         for ns in v:
4579             dfxp_data = dfxp_data.replace(ns, k)
4580
4581     dfxp = compat_etree_fromstring(dfxp_data)
4582     out = []
4583     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4584
4585     if not paras:
4586         raise ValueError('Invalid dfxp/TTML subtitle')
4587
4588     repeat = False
4589     while True:
4590         for style in dfxp.findall(_x('.//ttml:style')):
4591             style_id = style.get('id') or style.get(_x('xml:id'))
4592             if not style_id:
4593                 continue
4594             parent_style_id = style.get('style')
4595             if parent_style_id:
4596                 if parent_style_id not in styles:
4597                     repeat = True
4598                     continue
4599                 styles[style_id] = styles[parent_style_id].copy()
4600             for prop in SUPPORTED_STYLING:
4601                 prop_val = style.get(_x('tts:' + prop))
4602                 if prop_val:
4603                     styles.setdefault(style_id, {})[prop] = prop_val
4604         if repeat:
4605             repeat = False
4606         else:
4607             break
4608
4609     for p in ('body', 'div'):
4610         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4611         if ele is None:
4612             continue
4613         style = styles.get(ele.get('style'))
4614         if not style:
4615             continue
4616         default_style.update(style)
4617
4618     for para, index in zip(paras, itertools.count(1)):
4619         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4620         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4621         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4622         if begin_time is None:
4623             continue
4624         if not end_time:
4625             if not dur:
4626                 continue
4627             end_time = begin_time + dur
4628         out.append('%d\n%s --> %s\n%s\n\n' % (
4629             index,
4630             srt_subtitles_timecode(begin_time),
4631             srt_subtitles_timecode(end_time),
4632             parse_node(para)))
4633
4634     return ''.join(out)
4635
4636
4637 def cli_option(params, command_option, param):
4638     param = params.get(param)
4639     if param:
4640         param = compat_str(param)
4641     return [command_option, param] if param is not None else []
4642
4643
4644 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4645     param = params.get(param)
4646     if param is None:
4647         return []
4648     assert isinstance(param, bool)
4649     if separator:
4650         return [command_option + separator + (true_value if param else false_value)]
4651     return [command_option, true_value if param else false_value]
4652
4653
4654 def cli_valueless_option(params, command_option, param, expected_value=True):
4655     param = params.get(param)
4656     return [command_option] if param == expected_value else []
4657
4658
4659 def cli_configuration_args(params, arg_name, key, default=[], exe=None):  # returns arg, for_compat
4660     argdict = params.get(arg_name, {})
4661     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4662         return argdict, True
4663
4664     if argdict is None:
4665         return default, False
4666     assert isinstance(argdict, dict)
4667
4668     assert isinstance(key, compat_str)
4669     key = key.lower()
4670
4671     args = exe_args = None
4672     if exe is not None:
4673         assert isinstance(exe, compat_str)
4674         exe = exe.lower()
4675         args = argdict.get('%s+%s' % (key, exe))
4676         if args is None:
4677             exe_args = argdict.get(exe)
4678
4679     if args is None:
4680         args = argdict.get(key) if key != exe else None
4681     if args is None and exe_args is None:
4682         args = argdict.get('default', default)
4683
4684     args, exe_args = args or [], exe_args or []
4685     assert isinstance(args, (list, tuple))
4686     assert isinstance(exe_args, (list, tuple))
4687     return args + exe_args, False
4688
4689
4690 class ISO639Utils(object):
4691     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4692     _lang_map = {
4693         'aa': 'aar',
4694         'ab': 'abk',
4695         'ae': 'ave',
4696         'af': 'afr',
4697         'ak': 'aka',
4698         'am': 'amh',
4699         'an': 'arg',
4700         'ar': 'ara',
4701         'as': 'asm',
4702         'av': 'ava',
4703         'ay': 'aym',
4704         'az': 'aze',
4705         'ba': 'bak',
4706         'be': 'bel',
4707         'bg': 'bul',
4708         'bh': 'bih',
4709         'bi': 'bis',
4710         'bm': 'bam',
4711         'bn': 'ben',
4712         'bo': 'bod',
4713         'br': 'bre',
4714         'bs': 'bos',
4715         'ca': 'cat',
4716         'ce': 'che',
4717         'ch': 'cha',
4718         'co': 'cos',
4719         'cr': 'cre',
4720         'cs': 'ces',
4721         'cu': 'chu',
4722         'cv': 'chv',
4723         'cy': 'cym',
4724         'da': 'dan',
4725         'de': 'deu',
4726         'dv': 'div',
4727         'dz': 'dzo',
4728         'ee': 'ewe',
4729         'el': 'ell',
4730         'en': 'eng',
4731         'eo': 'epo',
4732         'es': 'spa',
4733         'et': 'est',
4734         'eu': 'eus',
4735         'fa': 'fas',
4736         'ff': 'ful',
4737         'fi': 'fin',
4738         'fj': 'fij',
4739         'fo': 'fao',
4740         'fr': 'fra',
4741         'fy': 'fry',
4742         'ga': 'gle',
4743         'gd': 'gla',
4744         'gl': 'glg',
4745         'gn': 'grn',
4746         'gu': 'guj',
4747         'gv': 'glv',
4748         'ha': 'hau',
4749         'he': 'heb',
4750         'iw': 'heb',  # Replaced by he in 1989 revision
4751         'hi': 'hin',
4752         'ho': 'hmo',
4753         'hr': 'hrv',
4754         'ht': 'hat',
4755         'hu': 'hun',
4756         'hy': 'hye',
4757         'hz': 'her',
4758         'ia': 'ina',
4759         'id': 'ind',
4760         'in': 'ind',  # Replaced by id in 1989 revision
4761         'ie': 'ile',
4762         'ig': 'ibo',
4763         'ii': 'iii',
4764         'ik': 'ipk',
4765         'io': 'ido',
4766         'is': 'isl',
4767         'it': 'ita',
4768         'iu': 'iku',
4769         'ja': 'jpn',
4770         'jv': 'jav',
4771         'ka': 'kat',
4772         'kg': 'kon',
4773         'ki': 'kik',
4774         'kj': 'kua',
4775         'kk': 'kaz',
4776         'kl': 'kal',
4777         'km': 'khm',
4778         'kn': 'kan',
4779         'ko': 'kor',
4780         'kr': 'kau',
4781         'ks': 'kas',
4782         'ku': 'kur',
4783         'kv': 'kom',
4784         'kw': 'cor',
4785         'ky': 'kir',
4786         'la': 'lat',
4787         'lb': 'ltz',
4788         'lg': 'lug',
4789         'li': 'lim',
4790         'ln': 'lin',
4791         'lo': 'lao',
4792         'lt': 'lit',
4793         'lu': 'lub',
4794         'lv': 'lav',
4795         'mg': 'mlg',
4796         'mh': 'mah',
4797         'mi': 'mri',
4798         'mk': 'mkd',
4799         'ml': 'mal',
4800         'mn': 'mon',
4801         'mr': 'mar',
4802         'ms': 'msa',
4803         'mt': 'mlt',
4804         'my': 'mya',
4805         'na': 'nau',
4806         'nb': 'nob',
4807         'nd': 'nde',
4808         'ne': 'nep',
4809         'ng': 'ndo',
4810         'nl': 'nld',
4811         'nn': 'nno',
4812         'no': 'nor',
4813         'nr': 'nbl',
4814         'nv': 'nav',
4815         'ny': 'nya',
4816         'oc': 'oci',
4817         'oj': 'oji',
4818         'om': 'orm',
4819         'or': 'ori',
4820         'os': 'oss',
4821         'pa': 'pan',
4822         'pi': 'pli',
4823         'pl': 'pol',
4824         'ps': 'pus',
4825         'pt': 'por',
4826         'qu': 'que',
4827         'rm': 'roh',
4828         'rn': 'run',
4829         'ro': 'ron',
4830         'ru': 'rus',
4831         'rw': 'kin',
4832         'sa': 'san',
4833         'sc': 'srd',
4834         'sd': 'snd',
4835         'se': 'sme',
4836         'sg': 'sag',
4837         'si': 'sin',
4838         'sk': 'slk',
4839         'sl': 'slv',
4840         'sm': 'smo',
4841         'sn': 'sna',
4842         'so': 'som',
4843         'sq': 'sqi',
4844         'sr': 'srp',
4845         'ss': 'ssw',
4846         'st': 'sot',
4847         'su': 'sun',
4848         'sv': 'swe',
4849         'sw': 'swa',
4850         'ta': 'tam',
4851         'te': 'tel',
4852         'tg': 'tgk',
4853         'th': 'tha',
4854         'ti': 'tir',
4855         'tk': 'tuk',
4856         'tl': 'tgl',
4857         'tn': 'tsn',
4858         'to': 'ton',
4859         'tr': 'tur',
4860         'ts': 'tso',
4861         'tt': 'tat',
4862         'tw': 'twi',
4863         'ty': 'tah',
4864         'ug': 'uig',
4865         'uk': 'ukr',
4866         'ur': 'urd',
4867         'uz': 'uzb',
4868         've': 'ven',
4869         'vi': 'vie',
4870         'vo': 'vol',
4871         'wa': 'wln',
4872         'wo': 'wol',
4873         'xh': 'xho',
4874         'yi': 'yid',
4875         'ji': 'yid',  # Replaced by yi in 1989 revision
4876         'yo': 'yor',
4877         'za': 'zha',
4878         'zh': 'zho',
4879         'zu': 'zul',
4880     }
4881
4882     @classmethod
4883     def short2long(cls, code):
4884         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4885         return cls._lang_map.get(code[:2])
4886
4887     @classmethod
4888     def long2short(cls, code):
4889         """Convert language code from ISO 639-2/T to ISO 639-1"""
4890         for short_name, long_name in cls._lang_map.items():
4891             if long_name == code:
4892                 return short_name
4893
4894
4895 class ISO3166Utils(object):
4896     # From http://data.okfn.org/data/core/country-list
4897     _country_map = {
4898         'AF': 'Afghanistan',
4899         'AX': 'Åland Islands',
4900         'AL': 'Albania',
4901         'DZ': 'Algeria',
4902         'AS': 'American Samoa',
4903         'AD': 'Andorra',
4904         'AO': 'Angola',
4905         'AI': 'Anguilla',
4906         'AQ': 'Antarctica',
4907         'AG': 'Antigua and Barbuda',
4908         'AR': 'Argentina',
4909         'AM': 'Armenia',
4910         'AW': 'Aruba',
4911         'AU': 'Australia',
4912         'AT': 'Austria',
4913         'AZ': 'Azerbaijan',
4914         'BS': 'Bahamas',
4915         'BH': 'Bahrain',
4916         'BD': 'Bangladesh',
4917         'BB': 'Barbados',
4918         'BY': 'Belarus',
4919         'BE': 'Belgium',
4920         'BZ': 'Belize',
4921         'BJ': 'Benin',
4922         'BM': 'Bermuda',
4923         'BT': 'Bhutan',
4924         'BO': 'Bolivia, Plurinational State of',
4925         'BQ': 'Bonaire, Sint Eustatius and Saba',
4926         'BA': 'Bosnia and Herzegovina',
4927         'BW': 'Botswana',
4928         'BV': 'Bouvet Island',
4929         'BR': 'Brazil',
4930         'IO': 'British Indian Ocean Territory',
4931         'BN': 'Brunei Darussalam',
4932         'BG': 'Bulgaria',
4933         'BF': 'Burkina Faso',
4934         'BI': 'Burundi',
4935         'KH': 'Cambodia',
4936         'CM': 'Cameroon',
4937         'CA': 'Canada',
4938         'CV': 'Cape Verde',
4939         'KY': 'Cayman Islands',
4940         'CF': 'Central African Republic',
4941         'TD': 'Chad',
4942         'CL': 'Chile',
4943         'CN': 'China',
4944         'CX': 'Christmas Island',
4945         'CC': 'Cocos (Keeling) Islands',
4946         'CO': 'Colombia',
4947         'KM': 'Comoros',
4948         'CG': 'Congo',
4949         'CD': 'Congo, the Democratic Republic of the',
4950         'CK': 'Cook Islands',
4951         'CR': 'Costa Rica',
4952         'CI': 'Côte d\'Ivoire',
4953         'HR': 'Croatia',
4954         'CU': 'Cuba',
4955         'CW': 'Curaçao',
4956         'CY': 'Cyprus',
4957         'CZ': 'Czech Republic',
4958         'DK': 'Denmark',
4959         'DJ': 'Djibouti',
4960         'DM': 'Dominica',
4961         'DO': 'Dominican Republic',
4962         'EC': 'Ecuador',
4963         'EG': 'Egypt',
4964         'SV': 'El Salvador',
4965         'GQ': 'Equatorial Guinea',
4966         'ER': 'Eritrea',
4967         'EE': 'Estonia',
4968         'ET': 'Ethiopia',
4969         'FK': 'Falkland Islands (Malvinas)',
4970         'FO': 'Faroe Islands',
4971         'FJ': 'Fiji',
4972         'FI': 'Finland',
4973         'FR': 'France',
4974         'GF': 'French Guiana',
4975         'PF': 'French Polynesia',
4976         'TF': 'French Southern Territories',
4977         'GA': 'Gabon',
4978         'GM': 'Gambia',
4979         'GE': 'Georgia',
4980         'DE': 'Germany',
4981         'GH': 'Ghana',
4982         'GI': 'Gibraltar',
4983         'GR': 'Greece',
4984         'GL': 'Greenland',
4985         'GD': 'Grenada',
4986         'GP': 'Guadeloupe',
4987         'GU': 'Guam',
4988         'GT': 'Guatemala',
4989         'GG': 'Guernsey',
4990         'GN': 'Guinea',
4991         'GW': 'Guinea-Bissau',
4992         'GY': 'Guyana',
4993         'HT': 'Haiti',
4994         'HM': 'Heard Island and McDonald Islands',
4995         'VA': 'Holy See (Vatican City State)',
4996         'HN': 'Honduras',
4997         'HK': 'Hong Kong',
4998         'HU': 'Hungary',
4999         'IS': 'Iceland',
5000         'IN': 'India',
5001         'ID': 'Indonesia',
5002         'IR': 'Iran, Islamic Republic of',
5003         'IQ': 'Iraq',
5004         'IE': 'Ireland',
5005         'IM': 'Isle of Man',
5006         'IL': 'Israel',
5007         'IT': 'Italy',
5008         'JM': 'Jamaica',
5009         'JP': 'Japan',
5010         'JE': 'Jersey',
5011         'JO': 'Jordan',
5012         'KZ': 'Kazakhstan',
5013         'KE': 'Kenya',
5014         'KI': 'Kiribati',
5015         'KP': 'Korea, Democratic People\'s Republic of',
5016         'KR': 'Korea, Republic of',
5017         'KW': 'Kuwait',
5018         'KG': 'Kyrgyzstan',
5019         'LA': 'Lao People\'s Democratic Republic',
5020         'LV': 'Latvia',
5021         'LB': 'Lebanon',
5022         'LS': 'Lesotho',
5023         'LR': 'Liberia',
5024         'LY': 'Libya',
5025         'LI': 'Liechtenstein',
5026         'LT': 'Lithuania',
5027         'LU': 'Luxembourg',
5028         'MO': 'Macao',
5029         'MK': 'Macedonia, the Former Yugoslav Republic of',
5030         'MG': 'Madagascar',
5031         'MW': 'Malawi',
5032         'MY': 'Malaysia',
5033         'MV': 'Maldives',
5034         'ML': 'Mali',
5035         'MT': 'Malta',
5036         'MH': 'Marshall Islands',
5037         'MQ': 'Martinique',
5038         'MR': 'Mauritania',
5039         'MU': 'Mauritius',
5040         'YT': 'Mayotte',
5041         'MX': 'Mexico',
5042         'FM': 'Micronesia, Federated States of',
5043         'MD': 'Moldova, Republic of',
5044         'MC': 'Monaco',
5045         'MN': 'Mongolia',
5046         'ME': 'Montenegro',
5047         'MS': 'Montserrat',
5048         'MA': 'Morocco',
5049         'MZ': 'Mozambique',
5050         'MM': 'Myanmar',
5051         'NA': 'Namibia',
5052         'NR': 'Nauru',
5053         'NP': 'Nepal',
5054         'NL': 'Netherlands',
5055         'NC': 'New Caledonia',
5056         'NZ': 'New Zealand',
5057         'NI': 'Nicaragua',
5058         'NE': 'Niger',
5059         'NG': 'Nigeria',
5060         'NU': 'Niue',
5061         'NF': 'Norfolk Island',
5062         'MP': 'Northern Mariana Islands',
5063         'NO': 'Norway',
5064         'OM': 'Oman',
5065         'PK': 'Pakistan',
5066         'PW': 'Palau',
5067         'PS': 'Palestine, State of',
5068         'PA': 'Panama',
5069         'PG': 'Papua New Guinea',
5070         'PY': 'Paraguay',
5071         'PE': 'Peru',
5072         'PH': 'Philippines',
5073         'PN': 'Pitcairn',
5074         'PL': 'Poland',
5075         'PT': 'Portugal',
5076         'PR': 'Puerto Rico',
5077         'QA': 'Qatar',
5078         'RE': 'Réunion',
5079         'RO': 'Romania',
5080         'RU': 'Russian Federation',
5081         'RW': 'Rwanda',
5082         'BL': 'Saint Barthélemy',
5083         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5084         'KN': 'Saint Kitts and Nevis',
5085         'LC': 'Saint Lucia',
5086         'MF': 'Saint Martin (French part)',
5087         'PM': 'Saint Pierre and Miquelon',
5088         'VC': 'Saint Vincent and the Grenadines',
5089         'WS': 'Samoa',
5090         'SM': 'San Marino',
5091         'ST': 'Sao Tome and Principe',
5092         'SA': 'Saudi Arabia',
5093         'SN': 'Senegal',
5094         'RS': 'Serbia',
5095         'SC': 'Seychelles',
5096         'SL': 'Sierra Leone',
5097         'SG': 'Singapore',
5098         'SX': 'Sint Maarten (Dutch part)',
5099         'SK': 'Slovakia',
5100         'SI': 'Slovenia',
5101         'SB': 'Solomon Islands',
5102         'SO': 'Somalia',
5103         'ZA': 'South Africa',
5104         'GS': 'South Georgia and the South Sandwich Islands',
5105         'SS': 'South Sudan',
5106         'ES': 'Spain',
5107         'LK': 'Sri Lanka',
5108         'SD': 'Sudan',
5109         'SR': 'Suriname',
5110         'SJ': 'Svalbard and Jan Mayen',
5111         'SZ': 'Swaziland',
5112         'SE': 'Sweden',
5113         'CH': 'Switzerland',
5114         'SY': 'Syrian Arab Republic',
5115         'TW': 'Taiwan, Province of China',
5116         'TJ': 'Tajikistan',
5117         'TZ': 'Tanzania, United Republic of',
5118         'TH': 'Thailand',
5119         'TL': 'Timor-Leste',
5120         'TG': 'Togo',
5121         'TK': 'Tokelau',
5122         'TO': 'Tonga',
5123         'TT': 'Trinidad and Tobago',
5124         'TN': 'Tunisia',
5125         'TR': 'Turkey',
5126         'TM': 'Turkmenistan',
5127         'TC': 'Turks and Caicos Islands',
5128         'TV': 'Tuvalu',
5129         'UG': 'Uganda',
5130         'UA': 'Ukraine',
5131         'AE': 'United Arab Emirates',
5132         'GB': 'United Kingdom',
5133         'US': 'United States',
5134         'UM': 'United States Minor Outlying Islands',
5135         'UY': 'Uruguay',
5136         'UZ': 'Uzbekistan',
5137         'VU': 'Vanuatu',
5138         'VE': 'Venezuela, Bolivarian Republic of',
5139         'VN': 'Viet Nam',
5140         'VG': 'Virgin Islands, British',
5141         'VI': 'Virgin Islands, U.S.',
5142         'WF': 'Wallis and Futuna',
5143         'EH': 'Western Sahara',
5144         'YE': 'Yemen',
5145         'ZM': 'Zambia',
5146         'ZW': 'Zimbabwe',
5147     }
5148
5149     @classmethod
5150     def short2full(cls, code):
5151         """Convert an ISO 3166-2 country code to the corresponding full name"""
5152         return cls._country_map.get(code.upper())
5153
5154
5155 class GeoUtils(object):
5156     # Major IPv4 address blocks per country
5157     _country_ip_map = {
5158         'AD': '46.172.224.0/19',
5159         'AE': '94.200.0.0/13',
5160         'AF': '149.54.0.0/17',
5161         'AG': '209.59.64.0/18',
5162         'AI': '204.14.248.0/21',
5163         'AL': '46.99.0.0/16',
5164         'AM': '46.70.0.0/15',
5165         'AO': '105.168.0.0/13',
5166         'AP': '182.50.184.0/21',
5167         'AQ': '23.154.160.0/24',
5168         'AR': '181.0.0.0/12',
5169         'AS': '202.70.112.0/20',
5170         'AT': '77.116.0.0/14',
5171         'AU': '1.128.0.0/11',
5172         'AW': '181.41.0.0/18',
5173         'AX': '185.217.4.0/22',
5174         'AZ': '5.197.0.0/16',
5175         'BA': '31.176.128.0/17',
5176         'BB': '65.48.128.0/17',
5177         'BD': '114.130.0.0/16',
5178         'BE': '57.0.0.0/8',
5179         'BF': '102.178.0.0/15',
5180         'BG': '95.42.0.0/15',
5181         'BH': '37.131.0.0/17',
5182         'BI': '154.117.192.0/18',
5183         'BJ': '137.255.0.0/16',
5184         'BL': '185.212.72.0/23',
5185         'BM': '196.12.64.0/18',
5186         'BN': '156.31.0.0/16',
5187         'BO': '161.56.0.0/16',
5188         'BQ': '161.0.80.0/20',
5189         'BR': '191.128.0.0/12',
5190         'BS': '24.51.64.0/18',
5191         'BT': '119.2.96.0/19',
5192         'BW': '168.167.0.0/16',
5193         'BY': '178.120.0.0/13',
5194         'BZ': '179.42.192.0/18',
5195         'CA': '99.224.0.0/11',
5196         'CD': '41.243.0.0/16',
5197         'CF': '197.242.176.0/21',
5198         'CG': '160.113.0.0/16',
5199         'CH': '85.0.0.0/13',
5200         'CI': '102.136.0.0/14',
5201         'CK': '202.65.32.0/19',
5202         'CL': '152.172.0.0/14',
5203         'CM': '102.244.0.0/14',
5204         'CN': '36.128.0.0/10',
5205         'CO': '181.240.0.0/12',
5206         'CR': '201.192.0.0/12',
5207         'CU': '152.206.0.0/15',
5208         'CV': '165.90.96.0/19',
5209         'CW': '190.88.128.0/17',
5210         'CY': '31.153.0.0/16',
5211         'CZ': '88.100.0.0/14',
5212         'DE': '53.0.0.0/8',
5213         'DJ': '197.241.0.0/17',
5214         'DK': '87.48.0.0/12',
5215         'DM': '192.243.48.0/20',
5216         'DO': '152.166.0.0/15',
5217         'DZ': '41.96.0.0/12',
5218         'EC': '186.68.0.0/15',
5219         'EE': '90.190.0.0/15',
5220         'EG': '156.160.0.0/11',
5221         'ER': '196.200.96.0/20',
5222         'ES': '88.0.0.0/11',
5223         'ET': '196.188.0.0/14',
5224         'EU': '2.16.0.0/13',
5225         'FI': '91.152.0.0/13',
5226         'FJ': '144.120.0.0/16',
5227         'FK': '80.73.208.0/21',
5228         'FM': '119.252.112.0/20',
5229         'FO': '88.85.32.0/19',
5230         'FR': '90.0.0.0/9',
5231         'GA': '41.158.0.0/15',
5232         'GB': '25.0.0.0/8',
5233         'GD': '74.122.88.0/21',
5234         'GE': '31.146.0.0/16',
5235         'GF': '161.22.64.0/18',
5236         'GG': '62.68.160.0/19',
5237         'GH': '154.160.0.0/12',
5238         'GI': '95.164.0.0/16',
5239         'GL': '88.83.0.0/19',
5240         'GM': '160.182.0.0/15',
5241         'GN': '197.149.192.0/18',
5242         'GP': '104.250.0.0/19',
5243         'GQ': '105.235.224.0/20',
5244         'GR': '94.64.0.0/13',
5245         'GT': '168.234.0.0/16',
5246         'GU': '168.123.0.0/16',
5247         'GW': '197.214.80.0/20',
5248         'GY': '181.41.64.0/18',
5249         'HK': '113.252.0.0/14',
5250         'HN': '181.210.0.0/16',
5251         'HR': '93.136.0.0/13',
5252         'HT': '148.102.128.0/17',
5253         'HU': '84.0.0.0/14',
5254         'ID': '39.192.0.0/10',
5255         'IE': '87.32.0.0/12',
5256         'IL': '79.176.0.0/13',
5257         'IM': '5.62.80.0/20',
5258         'IN': '117.192.0.0/10',
5259         'IO': '203.83.48.0/21',
5260         'IQ': '37.236.0.0/14',
5261         'IR': '2.176.0.0/12',
5262         'IS': '82.221.0.0/16',
5263         'IT': '79.0.0.0/10',
5264         'JE': '87.244.64.0/18',
5265         'JM': '72.27.0.0/17',
5266         'JO': '176.29.0.0/16',
5267         'JP': '133.0.0.0/8',
5268         'KE': '105.48.0.0/12',
5269         'KG': '158.181.128.0/17',
5270         'KH': '36.37.128.0/17',
5271         'KI': '103.25.140.0/22',
5272         'KM': '197.255.224.0/20',
5273         'KN': '198.167.192.0/19',
5274         'KP': '175.45.176.0/22',
5275         'KR': '175.192.0.0/10',
5276         'KW': '37.36.0.0/14',
5277         'KY': '64.96.0.0/15',
5278         'KZ': '2.72.0.0/13',
5279         'LA': '115.84.64.0/18',
5280         'LB': '178.135.0.0/16',
5281         'LC': '24.92.144.0/20',
5282         'LI': '82.117.0.0/19',
5283         'LK': '112.134.0.0/15',
5284         'LR': '102.183.0.0/16',
5285         'LS': '129.232.0.0/17',
5286         'LT': '78.56.0.0/13',
5287         'LU': '188.42.0.0/16',
5288         'LV': '46.109.0.0/16',
5289         'LY': '41.252.0.0/14',
5290         'MA': '105.128.0.0/11',
5291         'MC': '88.209.64.0/18',
5292         'MD': '37.246.0.0/16',
5293         'ME': '178.175.0.0/17',
5294         'MF': '74.112.232.0/21',
5295         'MG': '154.126.0.0/17',
5296         'MH': '117.103.88.0/21',
5297         'MK': '77.28.0.0/15',
5298         'ML': '154.118.128.0/18',
5299         'MM': '37.111.0.0/17',
5300         'MN': '49.0.128.0/17',
5301         'MO': '60.246.0.0/16',
5302         'MP': '202.88.64.0/20',
5303         'MQ': '109.203.224.0/19',
5304         'MR': '41.188.64.0/18',
5305         'MS': '208.90.112.0/22',
5306         'MT': '46.11.0.0/16',
5307         'MU': '105.16.0.0/12',
5308         'MV': '27.114.128.0/18',
5309         'MW': '102.70.0.0/15',
5310         'MX': '187.192.0.0/11',
5311         'MY': '175.136.0.0/13',
5312         'MZ': '197.218.0.0/15',
5313         'NA': '41.182.0.0/16',
5314         'NC': '101.101.0.0/18',
5315         'NE': '197.214.0.0/18',
5316         'NF': '203.17.240.0/22',
5317         'NG': '105.112.0.0/12',
5318         'NI': '186.76.0.0/15',
5319         'NL': '145.96.0.0/11',
5320         'NO': '84.208.0.0/13',
5321         'NP': '36.252.0.0/15',
5322         'NR': '203.98.224.0/19',
5323         'NU': '49.156.48.0/22',
5324         'NZ': '49.224.0.0/14',
5325         'OM': '5.36.0.0/15',
5326         'PA': '186.72.0.0/15',
5327         'PE': '186.160.0.0/14',
5328         'PF': '123.50.64.0/18',
5329         'PG': '124.240.192.0/19',
5330         'PH': '49.144.0.0/13',
5331         'PK': '39.32.0.0/11',
5332         'PL': '83.0.0.0/11',
5333         'PM': '70.36.0.0/20',
5334         'PR': '66.50.0.0/16',
5335         'PS': '188.161.0.0/16',
5336         'PT': '85.240.0.0/13',
5337         'PW': '202.124.224.0/20',
5338         'PY': '181.120.0.0/14',
5339         'QA': '37.210.0.0/15',
5340         'RE': '102.35.0.0/16',
5341         'RO': '79.112.0.0/13',
5342         'RS': '93.86.0.0/15',
5343         'RU': '5.136.0.0/13',
5344         'RW': '41.186.0.0/16',
5345         'SA': '188.48.0.0/13',
5346         'SB': '202.1.160.0/19',
5347         'SC': '154.192.0.0/11',
5348         'SD': '102.120.0.0/13',
5349         'SE': '78.64.0.0/12',
5350         'SG': '8.128.0.0/10',
5351         'SI': '188.196.0.0/14',
5352         'SK': '78.98.0.0/15',
5353         'SL': '102.143.0.0/17',
5354         'SM': '89.186.32.0/19',
5355         'SN': '41.82.0.0/15',
5356         'SO': '154.115.192.0/18',
5357         'SR': '186.179.128.0/17',
5358         'SS': '105.235.208.0/21',
5359         'ST': '197.159.160.0/19',
5360         'SV': '168.243.0.0/16',
5361         'SX': '190.102.0.0/20',
5362         'SY': '5.0.0.0/16',
5363         'SZ': '41.84.224.0/19',
5364         'TC': '65.255.48.0/20',
5365         'TD': '154.68.128.0/19',
5366         'TG': '196.168.0.0/14',
5367         'TH': '171.96.0.0/13',
5368         'TJ': '85.9.128.0/18',
5369         'TK': '27.96.24.0/21',
5370         'TL': '180.189.160.0/20',
5371         'TM': '95.85.96.0/19',
5372         'TN': '197.0.0.0/11',
5373         'TO': '175.176.144.0/21',
5374         'TR': '78.160.0.0/11',
5375         'TT': '186.44.0.0/15',
5376         'TV': '202.2.96.0/19',
5377         'TW': '120.96.0.0/11',
5378         'TZ': '156.156.0.0/14',
5379         'UA': '37.52.0.0/14',
5380         'UG': '102.80.0.0/13',
5381         'US': '6.0.0.0/8',
5382         'UY': '167.56.0.0/13',
5383         'UZ': '84.54.64.0/18',
5384         'VA': '212.77.0.0/19',
5385         'VC': '207.191.240.0/21',
5386         'VE': '186.88.0.0/13',
5387         'VG': '66.81.192.0/20',
5388         'VI': '146.226.0.0/16',
5389         'VN': '14.160.0.0/11',
5390         'VU': '202.80.32.0/20',
5391         'WF': '117.20.32.0/21',
5392         'WS': '202.4.32.0/19',
5393         'YE': '134.35.0.0/16',
5394         'YT': '41.242.116.0/22',
5395         'ZA': '41.0.0.0/11',
5396         'ZM': '102.144.0.0/13',
5397         'ZW': '102.177.192.0/18',
5398     }
5399
5400     @classmethod
5401     def random_ipv4(cls, code_or_block):
5402         if len(code_or_block) == 2:
5403             block = cls._country_ip_map.get(code_or_block.upper())
5404             if not block:
5405                 return None
5406         else:
5407             block = code_or_block
5408         addr, preflen = block.split('/')
5409         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5410         addr_max = addr_min | (0xffffffff >> int(preflen))
5411         return compat_str(socket.inet_ntoa(
5412             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5413
5414
5415 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5416     def __init__(self, proxies=None):
5417         # Set default handlers
5418         for type in ('http', 'https'):
5419             setattr(self, '%s_open' % type,
5420                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5421                         meth(r, proxy, type))
5422         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5423
5424     def proxy_open(self, req, proxy, type):
5425         req_proxy = req.headers.get('Ytdl-request-proxy')
5426         if req_proxy is not None:
5427             proxy = req_proxy
5428             del req.headers['Ytdl-request-proxy']
5429
5430         if proxy == '__noproxy__':
5431             return None  # No Proxy
5432         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5433             req.add_header('Ytdl-socks-proxy', proxy)
5434             # youtube-dlc's http/https handlers do wrapping the socket with socks
5435             return None
5436         return compat_urllib_request.ProxyHandler.proxy_open(
5437             self, req, proxy, type)
5438
5439
5440 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5441 # released into Public Domain
5442 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5443
5444 def long_to_bytes(n, blocksize=0):
5445     """long_to_bytes(n:long, blocksize:int) : string
5446     Convert a long integer to a byte string.
5447
5448     If optional blocksize is given and greater than zero, pad the front of the
5449     byte string with binary zeros so that the length is a multiple of
5450     blocksize.
5451     """
5452     # after much testing, this algorithm was deemed to be the fastest
5453     s = b''
5454     n = int(n)
5455     while n > 0:
5456         s = compat_struct_pack('>I', n & 0xffffffff) + s
5457         n = n >> 32
5458     # strip off leading zeros
5459     for i in range(len(s)):
5460         if s[i] != b'\000'[0]:
5461             break
5462     else:
5463         # only happens when n == 0
5464         s = b'\000'
5465         i = 0
5466     s = s[i:]
5467     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5468     # de-padding being done above, but sigh...
5469     if blocksize > 0 and len(s) % blocksize:
5470         s = (blocksize - len(s) % blocksize) * b'\000' + s
5471     return s
5472
5473
5474 def bytes_to_long(s):
5475     """bytes_to_long(string) : long
5476     Convert a byte string to a long integer.
5477
5478     This is (essentially) the inverse of long_to_bytes().
5479     """
5480     acc = 0
5481     length = len(s)
5482     if length % 4:
5483         extra = (4 - length % 4)
5484         s = b'\000' * extra + s
5485         length = length + extra
5486     for i in range(0, length, 4):
5487         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5488     return acc
5489
5490
5491 def ohdave_rsa_encrypt(data, exponent, modulus):
5492     '''
5493     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5494
5495     Input:
5496         data: data to encrypt, bytes-like object
5497         exponent, modulus: parameter e and N of RSA algorithm, both integer
5498     Output: hex string of encrypted data
5499
5500     Limitation: supports one block encryption only
5501     '''
5502
5503     payload = int(binascii.hexlify(data[::-1]), 16)
5504     encrypted = pow(payload, exponent, modulus)
5505     return '%x' % encrypted
5506
5507
5508 def pkcs1pad(data, length):
5509     """
5510     Padding input data with PKCS#1 scheme
5511
5512     @param {int[]} data        input data
5513     @param {int}   length      target length
5514     @returns {int[]}           padded data
5515     """
5516     if len(data) > length - 11:
5517         raise ValueError('Input data too long for PKCS#1 padding')
5518
5519     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5520     return [0, 2] + pseudo_random + [0] + data
5521
5522
5523 def encode_base_n(num, n, table=None):
5524     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5525     if not table:
5526         table = FULL_TABLE[:n]
5527
5528     if n > len(table):
5529         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5530
5531     if num == 0:
5532         return table[0]
5533
5534     ret = ''
5535     while num:
5536         ret = table[num % n] + ret
5537         num = num // n
5538     return ret
5539
5540
5541 def decode_packed_codes(code):
5542     mobj = re.search(PACKED_CODES_RE, code)
5543     obfuscated_code, base, count, symbols = mobj.groups()
5544     base = int(base)
5545     count = int(count)
5546     symbols = symbols.split('|')
5547     symbol_table = {}
5548
5549     while count:
5550         count -= 1
5551         base_n_count = encode_base_n(count, base)
5552         symbol_table[base_n_count] = symbols[count] or base_n_count
5553
5554     return re.sub(
5555         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5556         obfuscated_code)
5557
5558
5559 def caesar(s, alphabet, shift):
5560     if shift == 0:
5561         return s
5562     l = len(alphabet)
5563     return ''.join(
5564         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5565         for c in s)
5566
5567
5568 def rot47(s):
5569     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5570
5571
5572 def parse_m3u8_attributes(attrib):
5573     info = {}
5574     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5575         if val.startswith('"'):
5576             val = val[1:-1]
5577         info[key] = val
5578     return info
5579
5580
5581 def urshift(val, n):
5582     return val >> n if val >= 0 else (val + 0x100000000) >> n
5583
5584
5585 # Based on png2str() written by @gdkchan and improved by @yokrysty
5586 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5587 def decode_png(png_data):
5588     # Reference: https://www.w3.org/TR/PNG/
5589     header = png_data[8:]
5590
5591     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5592         raise IOError('Not a valid PNG file.')
5593
5594     int_map = {1: '>B', 2: '>H', 4: '>I'}
5595     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5596
5597     chunks = []
5598
5599     while header:
5600         length = unpack_integer(header[:4])
5601         header = header[4:]
5602
5603         chunk_type = header[:4]
5604         header = header[4:]
5605
5606         chunk_data = header[:length]
5607         header = header[length:]
5608
5609         header = header[4:]  # Skip CRC
5610
5611         chunks.append({
5612             'type': chunk_type,
5613             'length': length,
5614             'data': chunk_data
5615         })
5616
5617     ihdr = chunks[0]['data']
5618
5619     width = unpack_integer(ihdr[:4])
5620     height = unpack_integer(ihdr[4:8])
5621
5622     idat = b''
5623
5624     for chunk in chunks:
5625         if chunk['type'] == b'IDAT':
5626             idat += chunk['data']
5627
5628     if not idat:
5629         raise IOError('Unable to read PNG data.')
5630
5631     decompressed_data = bytearray(zlib.decompress(idat))
5632
5633     stride = width * 3
5634     pixels = []
5635
5636     def _get_pixel(idx):
5637         x = idx % stride
5638         y = idx // stride
5639         return pixels[y][x]
5640
5641     for y in range(height):
5642         basePos = y * (1 + stride)
5643         filter_type = decompressed_data[basePos]
5644
5645         current_row = []
5646
5647         pixels.append(current_row)
5648
5649         for x in range(stride):
5650             color = decompressed_data[1 + basePos + x]
5651             basex = y * stride + x
5652             left = 0
5653             up = 0
5654
5655             if x > 2:
5656                 left = _get_pixel(basex - 3)
5657             if y > 0:
5658                 up = _get_pixel(basex - stride)
5659
5660             if filter_type == 1:  # Sub
5661                 color = (color + left) & 0xff
5662             elif filter_type == 2:  # Up
5663                 color = (color + up) & 0xff
5664             elif filter_type == 3:  # Average
5665                 color = (color + ((left + up) >> 1)) & 0xff
5666             elif filter_type == 4:  # Paeth
5667                 a = left
5668                 b = up
5669                 c = 0
5670
5671                 if x > 2 and y > 0:
5672                     c = _get_pixel(basex - stride - 3)
5673
5674                 p = a + b - c
5675
5676                 pa = abs(p - a)
5677                 pb = abs(p - b)
5678                 pc = abs(p - c)
5679
5680                 if pa <= pb and pa <= pc:
5681                     color = (color + a) & 0xff
5682                 elif pb <= pc:
5683                     color = (color + b) & 0xff
5684                 else:
5685                     color = (color + c) & 0xff
5686
5687             current_row.append(color)
5688
5689     return width, height, pixels
5690
5691
5692 def write_xattr(path, key, value):
5693     # This mess below finds the best xattr tool for the job
5694     try:
5695         # try the pyxattr module...
5696         import xattr
5697
5698         if hasattr(xattr, 'set'):  # pyxattr
5699             # Unicode arguments are not supported in python-pyxattr until
5700             # version 0.5.0
5701             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5702             pyxattr_required_version = '0.5.0'
5703             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5704                 # TODO: fallback to CLI tools
5705                 raise XAttrUnavailableError(
5706                     'python-pyxattr is detected but is too old. '
5707                     'youtube-dlc requires %s or above while your version is %s. '
5708                     'Falling back to other xattr implementations' % (
5709                         pyxattr_required_version, xattr.__version__))
5710
5711             setxattr = xattr.set
5712         else:  # xattr
5713             setxattr = xattr.setxattr
5714
5715         try:
5716             setxattr(path, key, value)
5717         except EnvironmentError as e:
5718             raise XAttrMetadataError(e.errno, e.strerror)
5719
5720     except ImportError:
5721         if compat_os_name == 'nt':
5722             # Write xattrs to NTFS Alternate Data Streams:
5723             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5724             assert ':' not in key
5725             assert os.path.exists(path)
5726
5727             ads_fn = path + ':' + key
5728             try:
5729                 with open(ads_fn, 'wb') as f:
5730                     f.write(value)
5731             except EnvironmentError as e:
5732                 raise XAttrMetadataError(e.errno, e.strerror)
5733         else:
5734             user_has_setfattr = check_executable('setfattr', ['--version'])
5735             user_has_xattr = check_executable('xattr', ['-h'])
5736
5737             if user_has_setfattr or user_has_xattr:
5738
5739                 value = value.decode('utf-8')
5740                 if user_has_setfattr:
5741                     executable = 'setfattr'
5742                     opts = ['-n', key, '-v', value]
5743                 elif user_has_xattr:
5744                     executable = 'xattr'
5745                     opts = ['-w', key, value]
5746
5747                 cmd = ([encodeFilename(executable, True)]
5748                        + [encodeArgument(o) for o in opts]
5749                        + [encodeFilename(path, True)])
5750
5751                 try:
5752                     p = subprocess.Popen(
5753                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5754                 except EnvironmentError as e:
5755                     raise XAttrMetadataError(e.errno, e.strerror)
5756                 stdout, stderr = process_communicate_or_kill(p)
5757                 stderr = stderr.decode('utf-8', 'replace')
5758                 if p.returncode != 0:
5759                     raise XAttrMetadataError(p.returncode, stderr)
5760
5761             else:
5762                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5763                 if sys.platform.startswith('linux'):
5764                     raise XAttrUnavailableError(
5765                         "Couldn't find a tool to set the xattrs. "
5766                         "Install either the python 'pyxattr' or 'xattr' "
5767                         "modules, or the GNU 'attr' package "
5768                         "(which contains the 'setfattr' tool).")
5769                 else:
5770                     raise XAttrUnavailableError(
5771                         "Couldn't find a tool to set the xattrs. "
5772                         "Install either the python 'xattr' module, "
5773                         "or the 'xattr' binary.")
5774
5775
5776 def random_birthday(year_field, month_field, day_field):
5777     start_date = datetime.date(1950, 1, 1)
5778     end_date = datetime.date(1995, 12, 31)
5779     offset = random.randint(0, (end_date - start_date).days)
5780     random_date = start_date + datetime.timedelta(offset)
5781     return {
5782         year_field: str(random_date.year),
5783         month_field: str(random_date.month),
5784         day_field: str(random_date.day),
5785     }
5786
5787
5788 # Templates for internet shortcut files, which are plain text files.
5789 DOT_URL_LINK_TEMPLATE = '''
5790 [InternetShortcut]
5791 URL=%(url)s
5792 '''.lstrip()
5793
5794 DOT_WEBLOC_LINK_TEMPLATE = '''
5795 <?xml version="1.0" encoding="UTF-8"?>
5796 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5797 <plist version="1.0">
5798 <dict>
5799 \t<key>URL</key>
5800 \t<string>%(url)s</string>
5801 </dict>
5802 </plist>
5803 '''.lstrip()
5804
5805 DOT_DESKTOP_LINK_TEMPLATE = '''
5806 [Desktop Entry]
5807 Encoding=UTF-8
5808 Name=%(filename)s
5809 Type=Link
5810 URL=%(url)s
5811 Icon=text-html
5812 '''.lstrip()
5813
5814
5815 def iri_to_uri(iri):
5816     """
5817     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5818
5819     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5820     """
5821
5822     iri_parts = compat_urllib_parse_urlparse(iri)
5823
5824     if '[' in iri_parts.netloc:
5825         raise ValueError('IPv6 URIs are not, yet, supported.')
5826         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5827
5828     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5829
5830     net_location = ''
5831     if iri_parts.username:
5832         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5833         if iri_parts.password is not None:
5834             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5835         net_location += '@'
5836
5837     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5838     # The 'idna' encoding produces ASCII text.
5839     if iri_parts.port is not None and iri_parts.port != 80:
5840         net_location += ':' + str(iri_parts.port)
5841
5842     return compat_urllib_parse_urlunparse(
5843         (iri_parts.scheme,
5844             net_location,
5845
5846             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5847
5848             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5849             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5850
5851             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5852             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5853
5854             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5855
5856     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5857
5858
5859 def to_high_limit_path(path):
5860     if sys.platform in ['win32', 'cygwin']:
5861         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5862         return r'\\?\ '.rstrip() + os.path.abspath(path)
5863
5864     return path
5865
5866
5867 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5868     val = obj.get(field, default)
5869     if func and val not in ignore:
5870         val = func(val)
5871     return template % val if val not in ignore else default
5872
5873
5874 def clean_podcast_url(url):
5875     return re.sub(r'''(?x)
5876         (?:
5877             (?:
5878                 chtbl\.com/track|
5879                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5880                 play\.podtrac\.com
5881             )/[^/]+|
5882             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5883             flex\.acast\.com|
5884             pd(?:
5885                 cn\.co| # https://podcorn.com/analytics-prefix/
5886                 st\.fm # https://podsights.com/docs/
5887             )/e
5888         )/''', '', url)
5889
5890
5891 _HEX_TABLE = '0123456789abcdef'
5892
5893
5894 def random_uuidv4():
5895     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5896
5897
5898 def make_dir(path, to_screen=None):
5899     try:
5900         dn = os.path.dirname(path)
5901         if dn and not os.path.exists(dn):
5902             os.makedirs(dn)
5903         return True
5904     except (OSError, IOError) as err:
5905         if callable(to_screen) is not None:
5906             to_screen('unable to create directory ' + error_to_compat_str(err))
5907         return False