youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import imp
  20 import io
  21 import itertools
  22 import json
  23 import locale
  24 import math
  25 import operator
  26 import os
  27 import platform
  28 import random
  29 import re
  30 import socket
  31 import ssl
  32 import subprocess
  33 import sys
  34 import tempfile
  35 import time
  36 import traceback
  37 import xml.etree.ElementTree
  38 import zlib
  39
  40 from .compat import (
  41     compat_HTMLParseError,
  42     compat_HTMLParser,
  43     compat_basestring,
  44     compat_chr,
  45     compat_cookiejar,
  46     compat_ctypes_WINFUNCTYPE,
  47     compat_etree_fromstring,
  48     compat_expanduser,
  49     compat_html_entities,
  50     compat_html_entities_html5,
  51     compat_http_client,
  52     compat_integer_types,
  53     compat_numeric_types,
  54     compat_kwargs,
  55     compat_os_name,
  56     compat_parse_qs,
  57     compat_shlex_quote,
  58     compat_str,
  59     compat_struct_pack,
  60     compat_struct_unpack,
  61     compat_urllib_error,
  62     compat_urllib_parse,
  63     compat_urllib_parse_urlencode,
  64     compat_urllib_parse_urlparse,
  65     compat_urllib_parse_urlunparse,
  66     compat_urllib_parse_quote,
  67     compat_urllib_parse_quote_plus,
  68     compat_urllib_parse_unquote_plus,
  69     compat_urllib_request,
  70     compat_urlparse,
  71     compat_xpath,
  72 )
  73
  74 from .socks import (
  75     ProxyType,
  76     sockssocket,
  77 )
  78
  79
  80 def register_socks_protocols():
  81     # "Register" SOCKS protocols
  82     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  83     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  84     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  85         if scheme not in compat_urlparse.uses_netloc:
  86             compat_urlparse.uses_netloc.append(scheme)
  87
  88
  89 # This is not clearly defined otherwise
  90 compiled_regex_type = type(re.compile(''))
  91
  92
  93 def random_user_agent():
  94     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  95     _CHROME_VERSIONS = (
  96         '74.0.3729.129',
  97         '76.0.3780.3',
  98         '76.0.3780.2',
  99         '74.0.3729.128',
 100         '76.0.3780.1',
 101         '76.0.3780.0',
 102         '75.0.3770.15',
 103         '74.0.3729.127',
 104         '74.0.3729.126',
 105         '76.0.3779.1',
 106         '76.0.3779.0',
 107         '75.0.3770.14',
 108         '74.0.3729.125',
 109         '76.0.3778.1',
 110         '76.0.3778.0',
 111         '75.0.3770.13',
 112         '74.0.3729.124',
 113         '74.0.3729.123',
 114         '73.0.3683.121',
 115         '76.0.3777.1',
 116         '76.0.3777.0',
 117         '75.0.3770.12',
 118         '74.0.3729.122',
 119         '76.0.3776.4',
 120         '75.0.3770.11',
 121         '74.0.3729.121',
 122         '76.0.3776.3',
 123         '76.0.3776.2',
 124         '73.0.3683.120',
 125         '74.0.3729.120',
 126         '74.0.3729.119',
 127         '74.0.3729.118',
 128         '76.0.3776.1',
 129         '76.0.3776.0',
 130         '76.0.3775.5',
 131         '75.0.3770.10',
 132         '74.0.3729.117',
 133         '76.0.3775.4',
 134         '76.0.3775.3',
 135         '74.0.3729.116',
 136         '75.0.3770.9',
 137         '76.0.3775.2',
 138         '76.0.3775.1',
 139         '76.0.3775.0',
 140         '75.0.3770.8',
 141         '74.0.3729.115',
 142         '74.0.3729.114',
 143         '76.0.3774.1',
 144         '76.0.3774.0',
 145         '75.0.3770.7',
 146         '74.0.3729.113',
 147         '74.0.3729.112',
 148         '74.0.3729.111',
 149         '76.0.3773.1',
 150         '76.0.3773.0',
 151         '75.0.3770.6',
 152         '74.0.3729.110',
 153         '74.0.3729.109',
 154         '76.0.3772.1',
 155         '76.0.3772.0',
 156         '75.0.3770.5',
 157         '74.0.3729.108',
 158         '74.0.3729.107',
 159         '76.0.3771.1',
 160         '76.0.3771.0',
 161         '75.0.3770.4',
 162         '74.0.3729.106',
 163         '74.0.3729.105',
 164         '75.0.3770.3',
 165         '74.0.3729.104',
 166         '74.0.3729.103',
 167         '74.0.3729.102',
 168         '75.0.3770.2',
 169         '74.0.3729.101',
 170         '75.0.3770.1',
 171         '75.0.3770.0',
 172         '74.0.3729.100',
 173         '75.0.3769.5',
 174         '75.0.3769.4',
 175         '74.0.3729.99',
 176         '75.0.3769.3',
 177         '75.0.3769.2',
 178         '75.0.3768.6',
 179         '74.0.3729.98',
 180         '75.0.3769.1',
 181         '75.0.3769.0',
 182         '74.0.3729.97',
 183         '73.0.3683.119',
 184         '73.0.3683.118',
 185         '74.0.3729.96',
 186         '75.0.3768.5',
 187         '75.0.3768.4',
 188         '75.0.3768.3',
 189         '75.0.3768.2',
 190         '74.0.3729.95',
 191         '74.0.3729.94',
 192         '75.0.3768.1',
 193         '75.0.3768.0',
 194         '74.0.3729.93',
 195         '74.0.3729.92',
 196         '73.0.3683.117',
 197         '74.0.3729.91',
 198         '75.0.3766.3',
 199         '74.0.3729.90',
 200         '75.0.3767.2',
 201         '75.0.3767.1',
 202         '75.0.3767.0',
 203         '74.0.3729.89',
 204         '73.0.3683.116',
 205         '75.0.3766.2',
 206         '74.0.3729.88',
 207         '75.0.3766.1',
 208         '75.0.3766.0',
 209         '74.0.3729.87',
 210         '73.0.3683.115',
 211         '74.0.3729.86',
 212         '75.0.3765.1',
 213         '75.0.3765.0',
 214         '74.0.3729.85',
 215         '73.0.3683.114',
 216         '74.0.3729.84',
 217         '75.0.3764.1',
 218         '75.0.3764.0',
 219         '74.0.3729.83',
 220         '73.0.3683.113',
 221         '75.0.3763.2',
 222         '75.0.3761.4',
 223         '74.0.3729.82',
 224         '75.0.3763.1',
 225         '75.0.3763.0',
 226         '74.0.3729.81',
 227         '73.0.3683.112',
 228         '75.0.3762.1',
 229         '75.0.3762.0',
 230         '74.0.3729.80',
 231         '75.0.3761.3',
 232         '74.0.3729.79',
 233         '73.0.3683.111',
 234         '75.0.3761.2',
 235         '74.0.3729.78',
 236         '74.0.3729.77',
 237         '75.0.3761.1',
 238         '75.0.3761.0',
 239         '73.0.3683.110',
 240         '74.0.3729.76',
 241         '74.0.3729.75',
 242         '75.0.3760.0',
 243         '74.0.3729.74',
 244         '75.0.3759.8',
 245         '75.0.3759.7',
 246         '75.0.3759.6',
 247         '74.0.3729.73',
 248         '75.0.3759.5',
 249         '74.0.3729.72',
 250         '73.0.3683.109',
 251         '75.0.3759.4',
 252         '75.0.3759.3',
 253         '74.0.3729.71',
 254         '75.0.3759.2',
 255         '74.0.3729.70',
 256         '73.0.3683.108',
 257         '74.0.3729.69',
 258         '75.0.3759.1',
 259         '75.0.3759.0',
 260         '74.0.3729.68',
 261         '73.0.3683.107',
 262         '74.0.3729.67',
 263         '75.0.3758.1',
 264         '75.0.3758.0',
 265         '74.0.3729.66',
 266         '73.0.3683.106',
 267         '74.0.3729.65',
 268         '75.0.3757.1',
 269         '75.0.3757.0',
 270         '74.0.3729.64',
 271         '73.0.3683.105',
 272         '74.0.3729.63',
 273         '75.0.3756.1',
 274         '75.0.3756.0',
 275         '74.0.3729.62',
 276         '73.0.3683.104',
 277         '75.0.3755.3',
 278         '75.0.3755.2',
 279         '73.0.3683.103',
 280         '75.0.3755.1',
 281         '75.0.3755.0',
 282         '74.0.3729.61',
 283         '73.0.3683.102',
 284         '74.0.3729.60',
 285         '75.0.3754.2',
 286         '74.0.3729.59',
 287         '75.0.3753.4',
 288         '74.0.3729.58',
 289         '75.0.3754.1',
 290         '75.0.3754.0',
 291         '74.0.3729.57',
 292         '73.0.3683.101',
 293         '75.0.3753.3',
 294         '75.0.3752.2',
 295         '75.0.3753.2',
 296         '74.0.3729.56',
 297         '75.0.3753.1',
 298         '75.0.3753.0',
 299         '74.0.3729.55',
 300         '73.0.3683.100',
 301         '74.0.3729.54',
 302         '75.0.3752.1',
 303         '75.0.3752.0',
 304         '74.0.3729.53',
 305         '73.0.3683.99',
 306         '74.0.3729.52',
 307         '75.0.3751.1',
 308         '75.0.3751.0',
 309         '74.0.3729.51',
 310         '73.0.3683.98',
 311         '74.0.3729.50',
 312         '75.0.3750.0',
 313         '74.0.3729.49',
 314         '74.0.3729.48',
 315         '74.0.3729.47',
 316         '75.0.3749.3',
 317         '74.0.3729.46',
 318         '73.0.3683.97',
 319         '75.0.3749.2',
 320         '74.0.3729.45',
 321         '75.0.3749.1',
 322         '75.0.3749.0',
 323         '74.0.3729.44',
 324         '73.0.3683.96',
 325         '74.0.3729.43',
 326         '74.0.3729.42',
 327         '75.0.3748.1',
 328         '75.0.3748.0',
 329         '74.0.3729.41',
 330         '75.0.3747.1',
 331         '73.0.3683.95',
 332         '75.0.3746.4',
 333         '74.0.3729.40',
 334         '74.0.3729.39',
 335         '75.0.3747.0',
 336         '75.0.3746.3',
 337         '75.0.3746.2',
 338         '74.0.3729.38',
 339         '75.0.3746.1',
 340         '75.0.3746.0',
 341         '74.0.3729.37',
 342         '73.0.3683.94',
 343         '75.0.3745.5',
 344         '75.0.3745.4',
 345         '75.0.3745.3',
 346         '75.0.3745.2',
 347         '74.0.3729.36',
 348         '75.0.3745.1',
 349         '75.0.3745.0',
 350         '75.0.3744.2',
 351         '74.0.3729.35',
 352         '73.0.3683.93',
 353         '74.0.3729.34',
 354         '75.0.3744.1',
 355         '75.0.3744.0',
 356         '74.0.3729.33',
 357         '73.0.3683.92',
 358         '74.0.3729.32',
 359         '74.0.3729.31',
 360         '73.0.3683.91',
 361         '75.0.3741.2',
 362         '75.0.3740.5',
 363         '74.0.3729.30',
 364         '75.0.3741.1',
 365         '75.0.3741.0',
 366         '74.0.3729.29',
 367         '75.0.3740.4',
 368         '73.0.3683.90',
 369         '74.0.3729.28',
 370         '75.0.3740.3',
 371         '73.0.3683.89',
 372         '75.0.3740.2',
 373         '74.0.3729.27',
 374         '75.0.3740.1',
 375         '75.0.3740.0',
 376         '74.0.3729.26',
 377         '73.0.3683.88',
 378         '73.0.3683.87',
 379         '74.0.3729.25',
 380         '75.0.3739.1',
 381         '75.0.3739.0',
 382         '73.0.3683.86',
 383         '74.0.3729.24',
 384         '73.0.3683.85',
 385         '75.0.3738.4',
 386         '75.0.3738.3',
 387         '75.0.3738.2',
 388         '75.0.3738.1',
 389         '75.0.3738.0',
 390         '74.0.3729.23',
 391         '73.0.3683.84',
 392         '74.0.3729.22',
 393         '74.0.3729.21',
 394         '75.0.3737.1',
 395         '75.0.3737.0',
 396         '74.0.3729.20',
 397         '73.0.3683.83',
 398         '74.0.3729.19',
 399         '75.0.3736.1',
 400         '75.0.3736.0',
 401         '74.0.3729.18',
 402         '73.0.3683.82',
 403         '74.0.3729.17',
 404         '75.0.3735.1',
 405         '75.0.3735.0',
 406         '74.0.3729.16',
 407         '73.0.3683.81',
 408         '75.0.3734.1',
 409         '75.0.3734.0',
 410         '74.0.3729.15',
 411         '73.0.3683.80',
 412         '74.0.3729.14',
 413         '75.0.3733.1',
 414         '75.0.3733.0',
 415         '75.0.3732.1',
 416         '74.0.3729.13',
 417         '74.0.3729.12',
 418         '73.0.3683.79',
 419         '74.0.3729.11',
 420         '75.0.3732.0',
 421         '74.0.3729.10',
 422         '73.0.3683.78',
 423         '74.0.3729.9',
 424         '74.0.3729.8',
 425         '74.0.3729.7',
 426         '75.0.3731.3',
 427         '75.0.3731.2',
 428         '75.0.3731.0',
 429         '74.0.3729.6',
 430         '73.0.3683.77',
 431         '73.0.3683.76',
 432         '75.0.3730.5',
 433         '75.0.3730.4',
 434         '73.0.3683.75',
 435         '74.0.3729.5',
 436         '73.0.3683.74',
 437         '75.0.3730.3',
 438         '75.0.3730.2',
 439         '74.0.3729.4',
 440         '73.0.3683.73',
 441         '73.0.3683.72',
 442         '75.0.3730.1',
 443         '75.0.3730.0',
 444         '74.0.3729.3',
 445         '73.0.3683.71',
 446         '74.0.3729.2',
 447         '73.0.3683.70',
 448         '74.0.3729.1',
 449         '74.0.3729.0',
 450         '74.0.3726.4',
 451         '73.0.3683.69',
 452         '74.0.3726.3',
 453         '74.0.3728.0',
 454         '74.0.3726.2',
 455         '73.0.3683.68',
 456         '74.0.3726.1',
 457         '74.0.3726.0',
 458         '74.0.3725.4',
 459         '73.0.3683.67',
 460         '73.0.3683.66',
 461         '74.0.3725.3',
 462         '74.0.3725.2',
 463         '74.0.3725.1',
 464         '74.0.3724.8',
 465         '74.0.3725.0',
 466         '73.0.3683.65',
 467         '74.0.3724.7',
 468         '74.0.3724.6',
 469         '74.0.3724.5',
 470         '74.0.3724.4',
 471         '74.0.3724.3',
 472         '74.0.3724.2',
 473         '74.0.3724.1',
 474         '74.0.3724.0',
 475         '73.0.3683.64',
 476         '74.0.3723.1',
 477         '74.0.3723.0',
 478         '73.0.3683.63',
 479         '74.0.3722.1',
 480         '74.0.3722.0',
 481         '73.0.3683.62',
 482         '74.0.3718.9',
 483         '74.0.3702.3',
 484         '74.0.3721.3',
 485         '74.0.3721.2',
 486         '74.0.3721.1',
 487         '74.0.3721.0',
 488         '74.0.3720.6',
 489         '73.0.3683.61',
 490         '72.0.3626.122',
 491         '73.0.3683.60',
 492         '74.0.3720.5',
 493         '72.0.3626.121',
 494         '74.0.3718.8',
 495         '74.0.3720.4',
 496         '74.0.3720.3',
 497         '74.0.3718.7',
 498         '74.0.3720.2',
 499         '74.0.3720.1',
 500         '74.0.3720.0',
 501         '74.0.3718.6',
 502         '74.0.3719.5',
 503         '73.0.3683.59',
 504         '74.0.3718.5',
 505         '74.0.3718.4',
 506         '74.0.3719.4',
 507         '74.0.3719.3',
 508         '74.0.3719.2',
 509         '74.0.3719.1',
 510         '73.0.3683.58',
 511         '74.0.3719.0',
 512         '73.0.3683.57',
 513         '73.0.3683.56',
 514         '74.0.3718.3',
 515         '73.0.3683.55',
 516         '74.0.3718.2',
 517         '74.0.3718.1',
 518         '74.0.3718.0',
 519         '73.0.3683.54',
 520         '74.0.3717.2',
 521         '73.0.3683.53',
 522         '74.0.3717.1',
 523         '74.0.3717.0',
 524         '73.0.3683.52',
 525         '74.0.3716.1',
 526         '74.0.3716.0',
 527         '73.0.3683.51',
 528         '74.0.3715.1',
 529         '74.0.3715.0',
 530         '73.0.3683.50',
 531         '74.0.3711.2',
 532         '74.0.3714.2',
 533         '74.0.3713.3',
 534         '74.0.3714.1',
 535         '74.0.3714.0',
 536         '73.0.3683.49',
 537         '74.0.3713.1',
 538         '74.0.3713.0',
 539         '72.0.3626.120',
 540         '73.0.3683.48',
 541         '74.0.3712.2',
 542         '74.0.3712.1',
 543         '74.0.3712.0',
 544         '73.0.3683.47',
 545         '72.0.3626.119',
 546         '73.0.3683.46',
 547         '74.0.3710.2',
 548         '72.0.3626.118',
 549         '74.0.3711.1',
 550         '74.0.3711.0',
 551         '73.0.3683.45',
 552         '72.0.3626.117',
 553         '74.0.3710.1',
 554         '74.0.3710.0',
 555         '73.0.3683.44',
 556         '72.0.3626.116',
 557         '74.0.3709.1',
 558         '74.0.3709.0',
 559         '74.0.3704.9',
 560         '73.0.3683.43',
 561         '72.0.3626.115',
 562         '74.0.3704.8',
 563         '74.0.3704.7',
 564         '74.0.3708.0',
 565         '74.0.3706.7',
 566         '74.0.3704.6',
 567         '73.0.3683.42',
 568         '72.0.3626.114',
 569         '74.0.3706.6',
 570         '72.0.3626.113',
 571         '74.0.3704.5',
 572         '74.0.3706.5',
 573         '74.0.3706.4',
 574         '74.0.3706.3',
 575         '74.0.3706.2',
 576         '74.0.3706.1',
 577         '74.0.3706.0',
 578         '73.0.3683.41',
 579         '72.0.3626.112',
 580         '74.0.3705.1',
 581         '74.0.3705.0',
 582         '73.0.3683.40',
 583         '72.0.3626.111',
 584         '73.0.3683.39',
 585         '74.0.3704.4',
 586         '73.0.3683.38',
 587         '74.0.3704.3',
 588         '74.0.3704.2',
 589         '74.0.3704.1',
 590         '74.0.3704.0',
 591         '73.0.3683.37',
 592         '72.0.3626.110',
 593         '72.0.3626.109',
 594         '74.0.3703.3',
 595         '74.0.3703.2',
 596         '73.0.3683.36',
 597         '74.0.3703.1',
 598         '74.0.3703.0',
 599         '73.0.3683.35',
 600         '72.0.3626.108',
 601         '74.0.3702.2',
 602         '74.0.3699.3',
 603         '74.0.3702.1',
 604         '74.0.3702.0',
 605         '73.0.3683.34',
 606         '72.0.3626.107',
 607         '73.0.3683.33',
 608         '74.0.3701.1',
 609         '74.0.3701.0',
 610         '73.0.3683.32',
 611         '73.0.3683.31',
 612         '72.0.3626.105',
 613         '74.0.3700.1',
 614         '74.0.3700.0',
 615         '73.0.3683.29',
 616         '72.0.3626.103',
 617         '74.0.3699.2',
 618         '74.0.3699.1',
 619         '74.0.3699.0',
 620         '73.0.3683.28',
 621         '72.0.3626.102',
 622         '73.0.3683.27',
 623         '73.0.3683.26',
 624         '74.0.3698.0',
 625         '74.0.3696.2',
 626         '72.0.3626.101',
 627         '73.0.3683.25',
 628         '74.0.3696.1',
 629         '74.0.3696.0',
 630         '74.0.3694.8',
 631         '72.0.3626.100',
 632         '74.0.3694.7',
 633         '74.0.3694.6',
 634         '74.0.3694.5',
 635         '74.0.3694.4',
 636         '72.0.3626.99',
 637         '72.0.3626.98',
 638         '74.0.3694.3',
 639         '73.0.3683.24',
 640         '72.0.3626.97',
 641         '72.0.3626.96',
 642         '72.0.3626.95',
 643         '73.0.3683.23',
 644         '72.0.3626.94',
 645         '73.0.3683.22',
 646         '73.0.3683.21',
 647         '72.0.3626.93',
 648         '74.0.3694.2',
 649         '72.0.3626.92',
 650         '74.0.3694.1',
 651         '74.0.3694.0',
 652         '74.0.3693.6',
 653         '73.0.3683.20',
 654         '72.0.3626.91',
 655         '74.0.3693.5',
 656         '74.0.3693.4',
 657         '74.0.3693.3',
 658         '74.0.3693.2',
 659         '73.0.3683.19',
 660         '74.0.3693.1',
 661         '74.0.3693.0',
 662         '73.0.3683.18',
 663         '72.0.3626.90',
 664         '74.0.3692.1',
 665         '74.0.3692.0',
 666         '73.0.3683.17',
 667         '72.0.3626.89',
 668         '74.0.3687.3',
 669         '74.0.3691.1',
 670         '74.0.3691.0',
 671         '73.0.3683.16',
 672         '72.0.3626.88',
 673         '72.0.3626.87',
 674         '73.0.3683.15',
 675         '74.0.3690.1',
 676         '74.0.3690.0',
 677         '73.0.3683.14',
 678         '72.0.3626.86',
 679         '73.0.3683.13',
 680         '73.0.3683.12',
 681         '74.0.3689.1',
 682         '74.0.3689.0',
 683         '73.0.3683.11',
 684         '72.0.3626.85',
 685         '73.0.3683.10',
 686         '72.0.3626.84',
 687         '73.0.3683.9',
 688         '74.0.3688.1',
 689         '74.0.3688.0',
 690         '73.0.3683.8',
 691         '72.0.3626.83',
 692         '74.0.3687.2',
 693         '74.0.3687.1',
 694         '74.0.3687.0',
 695         '73.0.3683.7',
 696         '72.0.3626.82',
 697         '74.0.3686.4',
 698         '72.0.3626.81',
 699         '74.0.3686.3',
 700         '74.0.3686.2',
 701         '74.0.3686.1',
 702         '74.0.3686.0',
 703         '73.0.3683.6',
 704         '72.0.3626.80',
 705         '74.0.3685.1',
 706         '74.0.3685.0',
 707         '73.0.3683.5',
 708         '72.0.3626.79',
 709         '74.0.3684.1',
 710         '74.0.3684.0',
 711         '73.0.3683.4',
 712         '72.0.3626.78',
 713         '72.0.3626.77',
 714         '73.0.3683.3',
 715         '73.0.3683.2',
 716         '72.0.3626.76',
 717         '73.0.3683.1',
 718         '73.0.3683.0',
 719         '72.0.3626.75',
 720         '71.0.3578.141',
 721         '73.0.3682.1',
 722         '73.0.3682.0',
 723         '72.0.3626.74',
 724         '71.0.3578.140',
 725         '73.0.3681.4',
 726         '73.0.3681.3',
 727         '73.0.3681.2',
 728         '73.0.3681.1',
 729         '73.0.3681.0',
 730         '72.0.3626.73',
 731         '71.0.3578.139',
 732         '72.0.3626.72',
 733         '72.0.3626.71',
 734         '73.0.3680.1',
 735         '73.0.3680.0',
 736         '72.0.3626.70',
 737         '71.0.3578.138',
 738         '73.0.3678.2',
 739         '73.0.3679.1',
 740         '73.0.3679.0',
 741         '72.0.3626.69',
 742         '71.0.3578.137',
 743         '73.0.3678.1',
 744         '73.0.3678.0',
 745         '71.0.3578.136',
 746         '73.0.3677.1',
 747         '73.0.3677.0',
 748         '72.0.3626.68',
 749         '72.0.3626.67',
 750         '71.0.3578.135',
 751         '73.0.3676.1',
 752         '73.0.3676.0',
 753         '73.0.3674.2',
 754         '72.0.3626.66',
 755         '71.0.3578.134',
 756         '73.0.3674.1',
 757         '73.0.3674.0',
 758         '72.0.3626.65',
 759         '71.0.3578.133',
 760         '73.0.3673.2',
 761         '73.0.3673.1',
 762         '73.0.3673.0',
 763         '72.0.3626.64',
 764         '71.0.3578.132',
 765         '72.0.3626.63',
 766         '72.0.3626.62',
 767         '72.0.3626.61',
 768         '72.0.3626.60',
 769         '73.0.3672.1',
 770         '73.0.3672.0',
 771         '72.0.3626.59',
 772         '71.0.3578.131',
 773         '73.0.3671.3',
 774         '73.0.3671.2',
 775         '73.0.3671.1',
 776         '73.0.3671.0',
 777         '72.0.3626.58',
 778         '71.0.3578.130',
 779         '73.0.3670.1',
 780         '73.0.3670.0',
 781         '72.0.3626.57',
 782         '71.0.3578.129',
 783         '73.0.3669.1',
 784         '73.0.3669.0',
 785         '72.0.3626.56',
 786         '71.0.3578.128',
 787         '73.0.3668.2',
 788         '73.0.3668.1',
 789         '73.0.3668.0',
 790         '72.0.3626.55',
 791         '71.0.3578.127',
 792         '73.0.3667.2',
 793         '73.0.3667.1',
 794         '73.0.3667.0',
 795         '72.0.3626.54',
 796         '71.0.3578.126',
 797         '73.0.3666.1',
 798         '73.0.3666.0',
 799         '72.0.3626.53',
 800         '71.0.3578.125',
 801         '73.0.3665.4',
 802         '73.0.3665.3',
 803         '72.0.3626.52',
 804         '73.0.3665.2',
 805         '73.0.3664.4',
 806         '73.0.3665.1',
 807         '73.0.3665.0',
 808         '72.0.3626.51',
 809         '71.0.3578.124',
 810         '72.0.3626.50',
 811         '73.0.3664.3',
 812         '73.0.3664.2',
 813         '73.0.3664.1',
 814         '73.0.3664.0',
 815         '73.0.3663.2',
 816         '72.0.3626.49',
 817         '71.0.3578.123',
 818         '73.0.3663.1',
 819         '73.0.3663.0',
 820         '72.0.3626.48',
 821         '71.0.3578.122',
 822         '73.0.3662.1',
 823         '73.0.3662.0',
 824         '72.0.3626.47',
 825         '71.0.3578.121',
 826         '73.0.3661.1',
 827         '72.0.3626.46',
 828         '73.0.3661.0',
 829         '72.0.3626.45',
 830         '71.0.3578.120',
 831         '73.0.3660.2',
 832         '73.0.3660.1',
 833         '73.0.3660.0',
 834         '72.0.3626.44',
 835         '71.0.3578.119',
 836         '73.0.3659.1',
 837         '73.0.3659.0',
 838         '72.0.3626.43',
 839         '71.0.3578.118',
 840         '73.0.3658.1',
 841         '73.0.3658.0',
 842         '72.0.3626.42',
 843         '71.0.3578.117',
 844         '73.0.3657.1',
 845         '73.0.3657.0',
 846         '72.0.3626.41',
 847         '71.0.3578.116',
 848         '73.0.3656.1',
 849         '73.0.3656.0',
 850         '72.0.3626.40',
 851         '71.0.3578.115',
 852         '73.0.3655.1',
 853         '73.0.3655.0',
 854         '72.0.3626.39',
 855         '71.0.3578.114',
 856         '73.0.3654.1',
 857         '73.0.3654.0',
 858         '72.0.3626.38',
 859         '71.0.3578.113',
 860         '73.0.3653.1',
 861         '73.0.3653.0',
 862         '72.0.3626.37',
 863         '71.0.3578.112',
 864         '73.0.3652.1',
 865         '73.0.3652.0',
 866         '72.0.3626.36',
 867         '71.0.3578.111',
 868         '73.0.3651.1',
 869         '73.0.3651.0',
 870         '72.0.3626.35',
 871         '71.0.3578.110',
 872         '73.0.3650.1',
 873         '73.0.3650.0',
 874         '72.0.3626.34',
 875         '71.0.3578.109',
 876         '73.0.3649.1',
 877         '73.0.3649.0',
 878         '72.0.3626.33',
 879         '71.0.3578.108',
 880         '73.0.3648.2',
 881         '73.0.3648.1',
 882         '73.0.3648.0',
 883         '72.0.3626.32',
 884         '71.0.3578.107',
 885         '73.0.3647.2',
 886         '73.0.3647.1',
 887         '73.0.3647.0',
 888         '72.0.3626.31',
 889         '71.0.3578.106',
 890         '73.0.3635.3',
 891         '73.0.3646.2',
 892         '73.0.3646.1',
 893         '73.0.3646.0',
 894         '72.0.3626.30',
 895         '71.0.3578.105',
 896         '72.0.3626.29',
 897         '73.0.3645.2',
 898         '73.0.3645.1',
 899         '73.0.3645.0',
 900         '72.0.3626.28',
 901         '71.0.3578.104',
 902         '72.0.3626.27',
 903         '72.0.3626.26',
 904         '72.0.3626.25',
 905         '72.0.3626.24',
 906         '73.0.3644.0',
 907         '73.0.3643.2',
 908         '72.0.3626.23',
 909         '71.0.3578.103',
 910         '73.0.3643.1',
 911         '73.0.3643.0',
 912         '72.0.3626.22',
 913         '71.0.3578.102',
 914         '73.0.3642.1',
 915         '73.0.3642.0',
 916         '72.0.3626.21',
 917         '71.0.3578.101',
 918         '73.0.3641.1',
 919         '73.0.3641.0',
 920         '72.0.3626.20',
 921         '71.0.3578.100',
 922         '72.0.3626.19',
 923         '73.0.3640.1',
 924         '73.0.3640.0',
 925         '72.0.3626.18',
 926         '73.0.3639.1',
 927         '71.0.3578.99',
 928         '73.0.3639.0',
 929         '72.0.3626.17',
 930         '73.0.3638.2',
 931         '72.0.3626.16',
 932         '73.0.3638.1',
 933         '73.0.3638.0',
 934         '72.0.3626.15',
 935         '71.0.3578.98',
 936         '73.0.3635.2',
 937         '71.0.3578.97',
 938         '73.0.3637.1',
 939         '73.0.3637.0',
 940         '72.0.3626.14',
 941         '71.0.3578.96',
 942         '71.0.3578.95',
 943         '72.0.3626.13',
 944         '71.0.3578.94',
 945         '73.0.3636.2',
 946         '71.0.3578.93',
 947         '73.0.3636.1',
 948         '73.0.3636.0',
 949         '72.0.3626.12',
 950         '71.0.3578.92',
 951         '73.0.3635.1',
 952         '73.0.3635.0',
 953         '72.0.3626.11',
 954         '71.0.3578.91',
 955         '73.0.3634.2',
 956         '73.0.3634.1',
 957         '73.0.3634.0',
 958         '72.0.3626.10',
 959         '71.0.3578.90',
 960         '71.0.3578.89',
 961         '73.0.3633.2',
 962         '73.0.3633.1',
 963         '73.0.3633.0',
 964         '72.0.3610.4',
 965         '72.0.3626.9',
 966         '71.0.3578.88',
 967         '73.0.3632.5',
 968         '73.0.3632.4',
 969         '73.0.3632.3',
 970         '73.0.3632.2',
 971         '73.0.3632.1',
 972         '73.0.3632.0',
 973         '72.0.3626.8',
 974         '71.0.3578.87',
 975         '73.0.3631.2',
 976         '73.0.3631.1',
 977         '73.0.3631.0',
 978         '72.0.3626.7',
 979         '71.0.3578.86',
 980         '72.0.3626.6',
 981         '73.0.3630.1',
 982         '73.0.3630.0',
 983         '72.0.3626.5',
 984         '71.0.3578.85',
 985         '72.0.3626.4',
 986         '73.0.3628.3',
 987         '73.0.3628.2',
 988         '73.0.3629.1',
 989         '73.0.3629.0',
 990         '72.0.3626.3',
 991         '71.0.3578.84',
 992         '73.0.3628.1',
 993         '73.0.3628.0',
 994         '71.0.3578.83',
 995         '73.0.3627.1',
 996         '73.0.3627.0',
 997         '72.0.3626.2',
 998         '71.0.3578.82',
 999         '71.0.3578.81',
1000         '71.0.3578.80',
1001         '72.0.3626.1',
1002         '72.0.3626.0',
1003         '71.0.3578.79',
1004         '70.0.3538.124',
1005         '71.0.3578.78',
1006         '72.0.3623.4',
1007         '72.0.3625.2',
1008         '72.0.3625.1',
1009         '72.0.3625.0',
1010         '71.0.3578.77',
1011         '70.0.3538.123',
1012         '72.0.3624.4',
1013         '72.0.3624.3',
1014         '72.0.3624.2',
1015         '71.0.3578.76',
1016         '72.0.3624.1',
1017         '72.0.3624.0',
1018         '72.0.3623.3',
1019         '71.0.3578.75',
1020         '70.0.3538.122',
1021         '71.0.3578.74',
1022         '72.0.3623.2',
1023         '72.0.3610.3',
1024         '72.0.3623.1',
1025         '72.0.3623.0',
1026         '72.0.3622.3',
1027         '72.0.3622.2',
1028         '71.0.3578.73',
1029         '70.0.3538.121',
1030         '72.0.3622.1',
1031         '72.0.3622.0',
1032         '71.0.3578.72',
1033         '70.0.3538.120',
1034         '72.0.3621.1',
1035         '72.0.3621.0',
1036         '71.0.3578.71',
1037         '70.0.3538.119',
1038         '72.0.3620.1',
1039         '72.0.3620.0',
1040         '71.0.3578.70',
1041         '70.0.3538.118',
1042         '71.0.3578.69',
1043         '72.0.3619.1',
1044         '72.0.3619.0',
1045         '71.0.3578.68',
1046         '70.0.3538.117',
1047         '71.0.3578.67',
1048         '72.0.3618.1',
1049         '72.0.3618.0',
1050         '71.0.3578.66',
1051         '70.0.3538.116',
1052         '72.0.3617.1',
1053         '72.0.3617.0',
1054         '71.0.3578.65',
1055         '70.0.3538.115',
1056         '72.0.3602.3',
1057         '71.0.3578.64',
1058         '72.0.3616.1',
1059         '72.0.3616.0',
1060         '71.0.3578.63',
1061         '70.0.3538.114',
1062         '71.0.3578.62',
1063         '72.0.3615.1',
1064         '72.0.3615.0',
1065         '71.0.3578.61',
1066         '70.0.3538.113',
1067         '72.0.3614.1',
1068         '72.0.3614.0',
1069         '71.0.3578.60',
1070         '70.0.3538.112',
1071         '72.0.3613.1',
1072         '72.0.3613.0',
1073         '71.0.3578.59',
1074         '70.0.3538.111',
1075         '72.0.3612.2',
1076         '72.0.3612.1',
1077         '72.0.3612.0',
1078         '70.0.3538.110',
1079         '71.0.3578.58',
1080         '70.0.3538.109',
1081         '72.0.3611.2',
1082         '72.0.3611.1',
1083         '72.0.3611.0',
1084         '71.0.3578.57',
1085         '70.0.3538.108',
1086         '72.0.3610.2',
1087         '71.0.3578.56',
1088         '71.0.3578.55',
1089         '72.0.3610.1',
1090         '72.0.3610.0',
1091         '71.0.3578.54',
1092         '70.0.3538.107',
1093         '71.0.3578.53',
1094         '72.0.3609.3',
1095         '71.0.3578.52',
1096         '72.0.3609.2',
1097         '71.0.3578.51',
1098         '72.0.3608.5',
1099         '72.0.3609.1',
1100         '72.0.3609.0',
1101         '71.0.3578.50',
1102         '70.0.3538.106',
1103         '72.0.3608.4',
1104         '72.0.3608.3',
1105         '72.0.3608.2',
1106         '71.0.3578.49',
1107         '72.0.3608.1',
1108         '72.0.3608.0',
1109         '70.0.3538.105',
1110         '71.0.3578.48',
1111         '72.0.3607.1',
1112         '72.0.3607.0',
1113         '71.0.3578.47',
1114         '70.0.3538.104',
1115         '72.0.3606.2',
1116         '72.0.3606.1',
1117         '72.0.3606.0',
1118         '71.0.3578.46',
1119         '70.0.3538.103',
1120         '70.0.3538.102',
1121         '72.0.3605.3',
1122         '72.0.3605.2',
1123         '72.0.3605.1',
1124         '72.0.3605.0',
1125         '71.0.3578.45',
1126         '70.0.3538.101',
1127         '71.0.3578.44',
1128         '71.0.3578.43',
1129         '70.0.3538.100',
1130         '70.0.3538.99',
1131         '71.0.3578.42',
1132         '72.0.3604.1',
1133         '72.0.3604.0',
1134         '71.0.3578.41',
1135         '70.0.3538.98',
1136         '71.0.3578.40',
1137         '72.0.3603.2',
1138         '72.0.3603.1',
1139         '72.0.3603.0',
1140         '71.0.3578.39',
1141         '70.0.3538.97',
1142         '72.0.3602.2',
1143         '71.0.3578.38',
1144         '71.0.3578.37',
1145         '72.0.3602.1',
1146         '72.0.3602.0',
1147         '71.0.3578.36',
1148         '70.0.3538.96',
1149         '72.0.3601.1',
1150         '72.0.3601.0',
1151         '71.0.3578.35',
1152         '70.0.3538.95',
1153         '72.0.3600.1',
1154         '72.0.3600.0',
1155         '71.0.3578.34',
1156         '70.0.3538.94',
1157         '72.0.3599.3',
1158         '72.0.3599.2',
1159         '72.0.3599.1',
1160         '72.0.3599.0',
1161         '71.0.3578.33',
1162         '70.0.3538.93',
1163         '72.0.3598.1',
1164         '72.0.3598.0',
1165         '71.0.3578.32',
1166         '70.0.3538.87',
1167         '72.0.3597.1',
1168         '72.0.3597.0',
1169         '72.0.3596.2',
1170         '71.0.3578.31',
1171         '70.0.3538.86',
1172         '71.0.3578.30',
1173         '71.0.3578.29',
1174         '72.0.3596.1',
1175         '72.0.3596.0',
1176         '71.0.3578.28',
1177         '70.0.3538.85',
1178         '72.0.3595.2',
1179         '72.0.3591.3',
1180         '72.0.3595.1',
1181         '72.0.3595.0',
1182         '71.0.3578.27',
1183         '70.0.3538.84',
1184         '72.0.3594.1',
1185         '72.0.3594.0',
1186         '71.0.3578.26',
1187         '70.0.3538.83',
1188         '72.0.3593.2',
1189         '72.0.3593.1',
1190         '72.0.3593.0',
1191         '71.0.3578.25',
1192         '70.0.3538.82',
1193         '72.0.3589.3',
1194         '72.0.3592.2',
1195         '72.0.3592.1',
1196         '72.0.3592.0',
1197         '71.0.3578.24',
1198         '72.0.3589.2',
1199         '70.0.3538.81',
1200         '70.0.3538.80',
1201         '72.0.3591.2',
1202         '72.0.3591.1',
1203         '72.0.3591.0',
1204         '71.0.3578.23',
1205         '70.0.3538.79',
1206         '71.0.3578.22',
1207         '72.0.3590.1',
1208         '72.0.3590.0',
1209         '71.0.3578.21',
1210         '70.0.3538.78',
1211         '70.0.3538.77',
1212         '72.0.3589.1',
1213         '72.0.3589.0',
1214         '71.0.3578.20',
1215         '70.0.3538.76',
1216         '71.0.3578.19',
1217         '70.0.3538.75',
1218         '72.0.3588.1',
1219         '72.0.3588.0',
1220         '71.0.3578.18',
1221         '70.0.3538.74',
1222         '72.0.3586.2',
1223         '72.0.3587.0',
1224         '71.0.3578.17',
1225         '70.0.3538.73',
1226         '72.0.3586.1',
1227         '72.0.3586.0',
1228         '71.0.3578.16',
1229         '70.0.3538.72',
1230         '72.0.3585.1',
1231         '72.0.3585.0',
1232         '71.0.3578.15',
1233         '70.0.3538.71',
1234         '71.0.3578.14',
1235         '72.0.3584.1',
1236         '72.0.3584.0',
1237         '71.0.3578.13',
1238         '70.0.3538.70',
1239         '72.0.3583.2',
1240         '71.0.3578.12',
1241         '72.0.3583.1',
1242         '72.0.3583.0',
1243         '71.0.3578.11',
1244         '70.0.3538.69',
1245         '71.0.3578.10',
1246         '72.0.3582.0',
1247         '72.0.3581.4',
1248         '71.0.3578.9',
1249         '70.0.3538.67',
1250         '72.0.3581.3',
1251         '72.0.3581.2',
1252         '72.0.3581.1',
1253         '72.0.3581.0',
1254         '71.0.3578.8',
1255         '70.0.3538.66',
1256         '72.0.3580.1',
1257         '72.0.3580.0',
1258         '71.0.3578.7',
1259         '70.0.3538.65',
1260         '71.0.3578.6',
1261         '72.0.3579.1',
1262         '72.0.3579.0',
1263         '71.0.3578.5',
1264         '70.0.3538.64',
1265         '71.0.3578.4',
1266         '71.0.3578.3',
1267         '71.0.3578.2',
1268         '71.0.3578.1',
1269         '71.0.3578.0',
1270         '70.0.3538.63',
1271         '69.0.3497.128',
1272         '70.0.3538.62',
1273         '70.0.3538.61',
1274         '70.0.3538.60',
1275         '70.0.3538.59',
1276         '71.0.3577.1',
1277         '71.0.3577.0',
1278         '70.0.3538.58',
1279         '69.0.3497.127',
1280         '71.0.3576.2',
1281         '71.0.3576.1',
1282         '71.0.3576.0',
1283         '70.0.3538.57',
1284         '70.0.3538.56',
1285         '71.0.3575.2',
1286         '70.0.3538.55',
1287         '69.0.3497.126',
1288         '70.0.3538.54',
1289         '71.0.3575.1',
1290         '71.0.3575.0',
1291         '71.0.3574.1',
1292         '71.0.3574.0',
1293         '70.0.3538.53',
1294         '69.0.3497.125',
1295         '70.0.3538.52',
1296         '71.0.3573.1',
1297         '71.0.3573.0',
1298         '70.0.3538.51',
1299         '69.0.3497.124',
1300         '71.0.3572.1',
1301         '71.0.3572.0',
1302         '70.0.3538.50',
1303         '69.0.3497.123',
1304         '71.0.3571.2',
1305         '70.0.3538.49',
1306         '69.0.3497.122',
1307         '71.0.3571.1',
1308         '71.0.3571.0',
1309         '70.0.3538.48',
1310         '69.0.3497.121',
1311         '71.0.3570.1',
1312         '71.0.3570.0',
1313         '70.0.3538.47',
1314         '69.0.3497.120',
1315         '71.0.3568.2',
1316         '71.0.3569.1',
1317         '71.0.3569.0',
1318         '70.0.3538.46',
1319         '69.0.3497.119',
1320         '70.0.3538.45',
1321         '71.0.3568.1',
1322         '71.0.3568.0',
1323         '70.0.3538.44',
1324         '69.0.3497.118',
1325         '70.0.3538.43',
1326         '70.0.3538.42',
1327         '71.0.3567.1',
1328         '71.0.3567.0',
1329         '70.0.3538.41',
1330         '69.0.3497.117',
1331         '71.0.3566.1',
1332         '71.0.3566.0',
1333         '70.0.3538.40',
1334         '69.0.3497.116',
1335         '71.0.3565.1',
1336         '71.0.3565.0',
1337         '70.0.3538.39',
1338         '69.0.3497.115',
1339         '71.0.3564.1',
1340         '71.0.3564.0',
1341         '70.0.3538.38',
1342         '69.0.3497.114',
1343         '71.0.3563.0',
1344         '71.0.3562.2',
1345         '70.0.3538.37',
1346         '69.0.3497.113',
1347         '70.0.3538.36',
1348         '70.0.3538.35',
1349         '71.0.3562.1',
1350         '71.0.3562.0',
1351         '70.0.3538.34',
1352         '69.0.3497.112',
1353         '70.0.3538.33',
1354         '71.0.3561.1',
1355         '71.0.3561.0',
1356         '70.0.3538.32',
1357         '69.0.3497.111',
1358         '71.0.3559.6',
1359         '71.0.3560.1',
1360         '71.0.3560.0',
1361         '71.0.3559.5',
1362         '71.0.3559.4',
1363         '70.0.3538.31',
1364         '69.0.3497.110',
1365         '71.0.3559.3',
1366         '70.0.3538.30',
1367         '69.0.3497.109',
1368         '71.0.3559.2',
1369         '71.0.3559.1',
1370         '71.0.3559.0',
1371         '70.0.3538.29',
1372         '69.0.3497.108',
1373         '71.0.3558.2',
1374         '71.0.3558.1',
1375         '71.0.3558.0',
1376         '70.0.3538.28',
1377         '69.0.3497.107',
1378         '71.0.3557.2',
1379         '71.0.3557.1',
1380         '71.0.3557.0',
1381         '70.0.3538.27',
1382         '69.0.3497.106',
1383         '71.0.3554.4',
1384         '70.0.3538.26',
1385         '71.0.3556.1',
1386         '71.0.3556.0',
1387         '70.0.3538.25',
1388         '71.0.3554.3',
1389         '69.0.3497.105',
1390         '71.0.3554.2',
1391         '70.0.3538.24',
1392         '69.0.3497.104',
1393         '71.0.3555.2',
1394         '70.0.3538.23',
1395         '71.0.3555.1',
1396         '71.0.3555.0',
1397         '70.0.3538.22',
1398         '69.0.3497.103',
1399         '71.0.3554.1',
1400         '71.0.3554.0',
1401         '70.0.3538.21',
1402         '69.0.3497.102',
1403         '71.0.3553.3',
1404         '70.0.3538.20',
1405         '69.0.3497.101',
1406         '71.0.3553.2',
1407         '69.0.3497.100',
1408         '71.0.3553.1',
1409         '71.0.3553.0',
1410         '70.0.3538.19',
1411         '69.0.3497.99',
1412         '69.0.3497.98',
1413         '69.0.3497.97',
1414         '71.0.3552.6',
1415         '71.0.3552.5',
1416         '71.0.3552.4',
1417         '71.0.3552.3',
1418         '71.0.3552.2',
1419         '71.0.3552.1',
1420         '71.0.3552.0',
1421         '70.0.3538.18',
1422         '69.0.3497.96',
1423         '71.0.3551.3',
1424         '71.0.3551.2',
1425         '71.0.3551.1',
1426         '71.0.3551.0',
1427         '70.0.3538.17',
1428         '69.0.3497.95',
1429         '71.0.3550.3',
1430         '71.0.3550.2',
1431         '71.0.3550.1',
1432         '71.0.3550.0',
1433         '70.0.3538.16',
1434         '69.0.3497.94',
1435         '71.0.3549.1',
1436         '71.0.3549.0',
1437         '70.0.3538.15',
1438         '69.0.3497.93',
1439         '69.0.3497.92',
1440         '71.0.3548.1',
1441         '71.0.3548.0',
1442         '70.0.3538.14',
1443         '69.0.3497.91',
1444         '71.0.3547.1',
1445         '71.0.3547.0',
1446         '70.0.3538.13',
1447         '69.0.3497.90',
1448         '71.0.3546.2',
1449         '69.0.3497.89',
1450         '71.0.3546.1',
1451         '71.0.3546.0',
1452         '70.0.3538.12',
1453         '69.0.3497.88',
1454         '71.0.3545.4',
1455         '71.0.3545.3',
1456         '71.0.3545.2',
1457         '71.0.3545.1',
1458         '71.0.3545.0',
1459         '70.0.3538.11',
1460         '69.0.3497.87',
1461         '71.0.3544.5',
1462         '71.0.3544.4',
1463         '71.0.3544.3',
1464         '71.0.3544.2',
1465         '71.0.3544.1',
1466         '71.0.3544.0',
1467         '69.0.3497.86',
1468         '70.0.3538.10',
1469         '69.0.3497.85',
1470         '70.0.3538.9',
1471         '69.0.3497.84',
1472         '71.0.3543.4',
1473         '70.0.3538.8',
1474         '71.0.3543.3',
1475         '71.0.3543.2',
1476         '71.0.3543.1',
1477         '71.0.3543.0',
1478         '70.0.3538.7',
1479         '69.0.3497.83',
1480         '71.0.3542.2',
1481         '71.0.3542.1',
1482         '71.0.3542.0',
1483         '70.0.3538.6',
1484         '69.0.3497.82',
1485         '69.0.3497.81',
1486         '71.0.3541.1',
1487         '71.0.3541.0',
1488         '70.0.3538.5',
1489         '69.0.3497.80',
1490         '71.0.3540.1',
1491         '71.0.3540.0',
1492         '70.0.3538.4',
1493         '69.0.3497.79',
1494         '70.0.3538.3',
1495         '71.0.3539.1',
1496         '71.0.3539.0',
1497         '69.0.3497.78',
1498         '68.0.3440.134',
1499         '69.0.3497.77',
1500         '70.0.3538.2',
1501         '70.0.3538.1',
1502         '70.0.3538.0',
1503         '69.0.3497.76',
1504         '68.0.3440.133',
1505         '69.0.3497.75',
1506         '70.0.3537.2',
1507         '70.0.3537.1',
1508         '70.0.3537.0',
1509         '69.0.3497.74',
1510         '68.0.3440.132',
1511         '70.0.3536.0',
1512         '70.0.3535.5',
1513         '70.0.3535.4',
1514         '70.0.3535.3',
1515         '69.0.3497.73',
1516         '68.0.3440.131',
1517         '70.0.3532.8',
1518         '70.0.3532.7',
1519         '69.0.3497.72',
1520         '69.0.3497.71',
1521         '70.0.3535.2',
1522         '70.0.3535.1',
1523         '70.0.3535.0',
1524         '69.0.3497.70',
1525         '68.0.3440.130',
1526         '69.0.3497.69',
1527         '68.0.3440.129',
1528         '70.0.3534.4',
1529         '70.0.3534.3',
1530         '70.0.3534.2',
1531         '70.0.3534.1',
1532         '70.0.3534.0',
1533         '69.0.3497.68',
1534         '68.0.3440.128',
1535         '70.0.3533.2',
1536         '70.0.3533.1',
1537         '70.0.3533.0',
1538         '69.0.3497.67',
1539         '68.0.3440.127',
1540         '70.0.3532.6',
1541         '70.0.3532.5',
1542         '70.0.3532.4',
1543         '69.0.3497.66',
1544         '68.0.3440.126',
1545         '70.0.3532.3',
1546         '70.0.3532.2',
1547         '70.0.3532.1',
1548         '69.0.3497.60',
1549         '69.0.3497.65',
1550         '69.0.3497.64',
1551         '70.0.3532.0',
1552         '70.0.3531.0',
1553         '70.0.3530.4',
1554         '70.0.3530.3',
1555         '70.0.3530.2',
1556         '69.0.3497.58',
1557         '68.0.3440.125',
1558         '69.0.3497.57',
1559         '69.0.3497.56',
1560         '69.0.3497.55',
1561         '69.0.3497.54',
1562         '70.0.3530.1',
1563         '70.0.3530.0',
1564         '69.0.3497.53',
1565         '68.0.3440.124',
1566         '69.0.3497.52',
1567         '70.0.3529.3',
1568         '70.0.3529.2',
1569         '70.0.3529.1',
1570         '70.0.3529.0',
1571         '69.0.3497.51',
1572         '70.0.3528.4',
1573         '68.0.3440.123',
1574         '70.0.3528.3',
1575         '70.0.3528.2',
1576         '70.0.3528.1',
1577         '70.0.3528.0',
1578         '69.0.3497.50',
1579         '68.0.3440.122',
1580         '70.0.3527.1',
1581         '70.0.3527.0',
1582         '69.0.3497.49',
1583         '68.0.3440.121',
1584         '70.0.3526.1',
1585         '70.0.3526.0',
1586         '68.0.3440.120',
1587         '69.0.3497.48',
1588         '69.0.3497.47',
1589         '68.0.3440.119',
1590         '68.0.3440.118',
1591         '70.0.3525.5',
1592         '70.0.3525.4',
1593         '70.0.3525.3',
1594         '68.0.3440.117',
1595         '69.0.3497.46',
1596         '70.0.3525.2',
1597         '70.0.3525.1',
1598         '70.0.3525.0',
1599         '69.0.3497.45',
1600         '68.0.3440.116',
1601         '70.0.3524.4',
1602         '70.0.3524.3',
1603         '69.0.3497.44',
1604         '70.0.3524.2',
1605         '70.0.3524.1',
1606         '70.0.3524.0',
1607         '70.0.3523.2',
1608         '69.0.3497.43',
1609         '68.0.3440.115',
1610         '70.0.3505.9',
1611         '69.0.3497.42',
1612         '70.0.3505.8',
1613         '70.0.3523.1',
1614         '70.0.3523.0',
1615         '69.0.3497.41',
1616         '68.0.3440.114',
1617         '70.0.3505.7',
1618         '69.0.3497.40',
1619         '70.0.3522.1',
1620         '70.0.3522.0',
1621         '70.0.3521.2',
1622         '69.0.3497.39',
1623         '68.0.3440.113',
1624         '70.0.3505.6',
1625         '70.0.3521.1',
1626         '70.0.3521.0',
1627         '69.0.3497.38',
1628         '68.0.3440.112',
1629         '70.0.3520.1',
1630         '70.0.3520.0',
1631         '69.0.3497.37',
1632         '68.0.3440.111',
1633         '70.0.3519.3',
1634         '70.0.3519.2',
1635         '70.0.3519.1',
1636         '70.0.3519.0',
1637         '69.0.3497.36',
1638         '68.0.3440.110',
1639         '70.0.3518.1',
1640         '70.0.3518.0',
1641         '69.0.3497.35',
1642         '69.0.3497.34',
1643         '68.0.3440.109',
1644         '70.0.3517.1',
1645         '70.0.3517.0',
1646         '69.0.3497.33',
1647         '68.0.3440.108',
1648         '69.0.3497.32',
1649         '70.0.3516.3',
1650         '70.0.3516.2',
1651         '70.0.3516.1',
1652         '70.0.3516.0',
1653         '69.0.3497.31',
1654         '68.0.3440.107',
1655         '70.0.3515.4',
1656         '68.0.3440.106',
1657         '70.0.3515.3',
1658         '70.0.3515.2',
1659         '70.0.3515.1',
1660         '70.0.3515.0',
1661         '69.0.3497.30',
1662         '68.0.3440.105',
1663         '68.0.3440.104',
1664         '70.0.3514.2',
1665         '70.0.3514.1',
1666         '70.0.3514.0',
1667         '69.0.3497.29',
1668         '68.0.3440.103',
1669         '70.0.3513.1',
1670         '70.0.3513.0',
1671         '69.0.3497.28',
1672     )
1673     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677     'User-Agent': random_user_agent(),
1678     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680     'Accept-Encoding': 'gzip, deflate',
1681     'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693     'January', 'February', 'March', 'April', 'May', 'June',
1694     'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697     'en': ENGLISH_MONTH_NAMES,
1698     'fr': [
1699         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705     'flv', 'f4v', 'f4a', 'f4b',
1706     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707     'mkv', 'mka', 'mk3d',
1708     'avi', 'divx',
1709     'mov',
1710     'asf', 'wmv', 'wma',
1711     '3gp', '3g2',
1712     'mp3',
1713     'flac',
1714     'ape',
1715     'wav',
1716     'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 # needed for sanitizing filenames in restricted mode
1719 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1720                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1721                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1722
1723 DATE_FORMATS = (
1724     '%d %B %Y',
1725     '%d %b %Y',
1726     '%B %d %Y',
1727     '%B %dst %Y',
1728     '%B %dnd %Y',
1729     '%B %drd %Y',
1730     '%B %dth %Y',
1731     '%b %d %Y',
1732     '%b %dst %Y',
1733     '%b %dnd %Y',
1734     '%b %drd %Y',
1735     '%b %dth %Y',
1736     '%b %dst %Y %I:%M',
1737     '%b %dnd %Y %I:%M',
1738     '%b %drd %Y %I:%M',
1739     '%b %dth %Y %I:%M',
1740     '%Y %m %d',
1741     '%Y-%m-%d',
1742     '%Y/%m/%d',
1743     '%Y/%m/%d %H:%M',
1744     '%Y/%m/%d %H:%M:%S',
1745     '%Y-%m-%d %H:%M',
1746     '%Y-%m-%d %H:%M:%S',
1747     '%Y-%m-%d %H:%M:%S.%f',
1748     '%d.%m.%Y %H:%M',
1749     '%d.%m.%Y %H.%M',
1750     '%Y-%m-%dT%H:%M:%SZ',
1751     '%Y-%m-%dT%H:%M:%S.%fZ',
1752     '%Y-%m-%dT%H:%M:%S.%f0Z',
1753     '%Y-%m-%dT%H:%M:%S',
1754     '%Y-%m-%dT%H:%M:%S.%f',
1755     '%Y-%m-%dT%H:%M',
1756     '%b %d %Y at %H:%M',
1757     '%b %d %Y at %H:%M:%S',
1758     '%B %d %Y at %H:%M',
1759     '%B %d %Y at %H:%M:%S',
1760 )
1761
1762 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1763 DATE_FORMATS_DAY_FIRST.extend([
1764     '%d-%m-%Y',
1765     '%d.%m.%Y',
1766     '%d.%m.%y',
1767     '%d/%m/%Y',
1768     '%d/%m/%y',
1769     '%d/%m/%Y %H:%M:%S',
1770 ])
1771
1772 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1773 DATE_FORMATS_MONTH_FIRST.extend([
1774     '%m-%d-%Y',
1775     '%m.%d.%Y',
1776     '%m/%d/%Y',
1777     '%m/%d/%y',
1778     '%m/%d/%Y %H:%M:%S',
1779 ])
1780
1781 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1782 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1783
1784
1785 def preferredencoding():
1786     """Get preferred encoding.
1787
1788     Returns the best encoding scheme for the system, based on
1789     locale.getpreferredencoding() and some further tweaks.
1790     """
1791     try:
1792         pref = locale.getpreferredencoding()
1793         'TEST'.encode(pref)
1794     except Exception:
1795         pref = 'UTF-8'
1796
1797     return pref
1798
1799
1800 def write_json_file(obj, fn):
1801     """ Encode obj as JSON and write it to fn, atomically if possible """
1802
1803     fn = encodeFilename(fn)
1804     if sys.version_info < (3, 0) and sys.platform != 'win32':
1805         encoding = get_filesystem_encoding()
1806         # os.path.basename returns a bytes object, but NamedTemporaryFile
1807         # will fail if the filename contains non ascii characters unless we
1808         # use a unicode object
1809         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1810         # the same for os.path.dirname
1811         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1812     else:
1813         path_basename = os.path.basename
1814         path_dirname = os.path.dirname
1815
1816     args = {
1817         'suffix': '.tmp',
1818         'prefix': path_basename(fn) + '.',
1819         'dir': path_dirname(fn),
1820         'delete': False,
1821     }
1822
1823     # In Python 2.x, json.dump expects a bytestream.
1824     # In Python 3.x, it writes to a character stream
1825     if sys.version_info < (3, 0):
1826         args['mode'] = 'wb'
1827     else:
1828         args.update({
1829             'mode': 'w',
1830             'encoding': 'utf-8',
1831         })
1832
1833     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1834
1835     try:
1836         with tf:
1837             json.dump(obj, tf)
1838         if sys.platform == 'win32':
1839             # Need to remove existing file on Windows, else os.rename raises
1840             # WindowsError or FileExistsError.
1841             try:
1842                 os.unlink(fn)
1843             except OSError:
1844                 pass
1845         try:
1846             mask = os.umask(0)
1847             os.umask(mask)
1848             os.chmod(tf.name, 0o666 & ~mask)
1849         except OSError:
1850             pass
1851         os.rename(tf.name, fn)
1852     except Exception:
1853         try:
1854             os.remove(tf.name)
1855         except OSError:
1856             pass
1857         raise
1858
1859
1860 if sys.version_info >= (2, 7):
1861     def find_xpath_attr(node, xpath, key, val=None):
1862         """ Find the xpath xpath[@key=val] """
1863         assert re.match(r'^[a-zA-Z_-]+$', key)
1864         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1865         return node.find(expr)
1866 else:
1867     def find_xpath_attr(node, xpath, key, val=None):
1868         for f in node.findall(compat_xpath(xpath)):
1869             if key not in f.attrib:
1870                 continue
1871             if val is None or f.attrib.get(key) == val:
1872                 return f
1873         return None
1874
1875 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1876 # the namespace parameter
1877
1878
1879 def xpath_with_ns(path, ns_map):
1880     components = [c.split(':') for c in path.split('/')]
1881     replaced = []
1882     for c in components:
1883         if len(c) == 1:
1884             replaced.append(c[0])
1885         else:
1886             ns, tag = c
1887             replaced.append('{%s}%s' % (ns_map[ns], tag))
1888     return '/'.join(replaced)
1889
1890
1891 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1892     def _find_xpath(xpath):
1893         return node.find(compat_xpath(xpath))
1894
1895     if isinstance(xpath, (str, compat_str)):
1896         n = _find_xpath(xpath)
1897     else:
1898         for xp in xpath:
1899             n = _find_xpath(xp)
1900             if n is not None:
1901                 break
1902
1903     if n is None:
1904         if default is not NO_DEFAULT:
1905             return default
1906         elif fatal:
1907             name = xpath if name is None else name
1908             raise ExtractorError('Could not find XML element %s' % name)
1909         else:
1910             return None
1911     return n
1912
1913
1914 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1915     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1916     if n is None or n == default:
1917         return n
1918     if n.text is None:
1919         if default is not NO_DEFAULT:
1920             return default
1921         elif fatal:
1922             name = xpath if name is None else name
1923             raise ExtractorError('Could not find XML element\'s text %s' % name)
1924         else:
1925             return None
1926     return n.text
1927
1928
1929 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1930     n = find_xpath_attr(node, xpath, key)
1931     if n is None:
1932         if default is not NO_DEFAULT:
1933             return default
1934         elif fatal:
1935             name = '%s[@%s]' % (xpath, key) if name is None else name
1936             raise ExtractorError('Could not find XML attribute %s' % name)
1937         else:
1938             return None
1939     return n.attrib[key]
1940
1941
1942 def get_element_by_id(id, html):
1943     """Return the content of the tag with the specified ID in the passed HTML document"""
1944     return get_element_by_attribute('id', id, html)
1945
1946
1947 def get_element_by_class(class_name, html):
1948     """Return the content of the first tag with the specified class in the passed HTML document"""
1949     retval = get_elements_by_class(class_name, html)
1950     return retval[0] if retval else None
1951
1952
1953 def get_element_by_attribute(attribute, value, html, escape_value=True):
1954     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1955     return retval[0] if retval else None
1956
1957
1958 def get_elements_by_class(class_name, html):
1959     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1960     return get_elements_by_attribute(
1961         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1962         html, escape_value=False)
1963
1964
1965 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1966     """Return the content of the tag with the specified attribute in the passed HTML document"""
1967
1968     value = re.escape(value) if escape_value else value
1969
1970     retlist = []
1971     for m in re.finditer(r'''(?xs)
1972         <([a-zA-Z0-9:._-]+)
1973          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974          \s+%s=['"]?%s['"]?
1975          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976         \s*>
1977         (?P<content>.*?)
1978         </\1>
1979     ''' % (re.escape(attribute), value), html):
1980         res = m.group('content')
1981
1982         if res.startswith('"') or res.startswith("'"):
1983             res = res[1:-1]
1984
1985         retlist.append(unescapeHTML(res))
1986
1987     return retlist
1988
1989
1990 class HTMLAttributeParser(compat_HTMLParser):
1991     """Trivial HTML parser to gather the attributes for a single element"""
1992
1993     def __init__(self):
1994         self.attrs = {}
1995         compat_HTMLParser.__init__(self)
1996
1997     def handle_starttag(self, tag, attrs):
1998         self.attrs = dict(attrs)
1999
2000
2001 def extract_attributes(html_element):
2002     """Given a string for an HTML element such as
2003     <el
2004          a="foo" B="bar" c="&98;az" d=boz
2005          empty= noval entity="&amp;"
2006          sq='"' dq="'"
2007     >
2008     Decode and return a dictionary of attributes.
2009     {
2010         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2011         'empty': '', 'noval': None, 'entity': '&',
2012         'sq': '"', 'dq': '\''
2013     }.
2014     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2015     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2016     """
2017     parser = HTMLAttributeParser()
2018     try:
2019         parser.feed(html_element)
2020         parser.close()
2021     # Older Python may throw HTMLParseError in case of malformed HTML
2022     except compat_HTMLParseError:
2023         pass
2024     return parser.attrs
2025
2026
2027 def clean_html(html):
2028     """Clean an HTML snippet into a readable string"""
2029
2030     if html is None:  # Convenience for sanitizing descriptions etc.
2031         return html
2032
2033     # Newline vs <br />
2034     html = html.replace('\n', ' ')
2035     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2036     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2037     # Strip html tags
2038     html = re.sub('<.*?>', '', html)
2039     # Replace html entities
2040     html = unescapeHTML(html)
2041     return html.strip()
2042
2043
2044 def sanitize_open(filename, open_mode):
2045     """Try to open the given filename, and slightly tweak it if this fails.
2046
2047     Attempts to open the given filename. If this fails, it tries to change
2048     the filename slightly, step by step, until it's either able to open it
2049     or it fails and raises a final exception, like the standard open()
2050     function.
2051
2052     It returns the tuple (stream, definitive_file_name).
2053     """
2054     try:
2055         if filename == '-':
2056             if sys.platform == 'win32':
2057                 import msvcrt
2058                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2059             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2060         stream = open(encodeFilename(filename), open_mode)
2061         return (stream, filename)
2062     except (IOError, OSError) as err:
2063         if err.errno in (errno.EACCES,):
2064             raise
2065
2066         # In case of error, try to remove win32 forbidden chars
2067         alt_filename = sanitize_path(filename)
2068         if alt_filename == filename:
2069             raise
2070         else:
2071             # An exception here should be caught in the caller
2072             stream = open(encodeFilename(alt_filename), open_mode)
2073             return (stream, alt_filename)
2074
2075
2076 def timeconvert(timestr):
2077     """Convert RFC 2822 defined time string into system timestamp"""
2078     timestamp = None
2079     timetuple = email.utils.parsedate_tz(timestr)
2080     if timetuple is not None:
2081         timestamp = email.utils.mktime_tz(timetuple)
2082     return timestamp
2083
2084
2085 def sanitize_filename(s, restricted=False, is_id=False):
2086     """Sanitizes a string so it could be used as part of a filename.
2087     If restricted is set, use a stricter subset of allowed characters.
2088     Set is_id if this is not an arbitrary string, but an ID that should be kept
2089     if possible.
2090     """
2091     def replace_insane(char):
2092         if restricted and char in ACCENT_CHARS:
2093             return ACCENT_CHARS[char]
2094         if char == '?' or ord(char) < 32 or ord(char) == 127:
2095             return ''
2096         elif char == '"':
2097             return '' if restricted else '\''
2098         elif char == ':':
2099             return '_-' if restricted else ' -'
2100         elif char in '\\/|*<>':
2101             return '_'
2102         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2103             return '_'
2104         if restricted and ord(char) > 127:
2105             return '_'
2106         return char
2107
2108     # Handle timestamps
2109     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2110     result = ''.join(map(replace_insane, s))
2111     if not is_id:
2112         while '__' in result:
2113             result = result.replace('__', '_')
2114         result = result.strip('_')
2115         # Common case of "Foreign band name - English song title"
2116         if restricted and result.startswith('-_'):
2117             result = result[2:]
2118         if result.startswith('-'):
2119             result = '_' + result[len('-'):]
2120         result = result.lstrip('.')
2121         if not result:
2122             result = '_'
2123     return result
2124
2125
2126 def sanitize_path(s):
2127     """Sanitizes and normalizes path on Windows"""
2128     if sys.platform != 'win32':
2129         return s
2130     drive_or_unc, _ = os.path.splitdrive(s)
2131     if sys.version_info < (2, 7) and not drive_or_unc:
2132         drive_or_unc, _ = os.path.splitunc(s)
2133     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2134     if drive_or_unc:
2135         norm_path.pop(0)
2136     sanitized_path = [
2137         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2138         for path_part in norm_path]
2139     if drive_or_unc:
2140         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2141     return os.path.join(*sanitized_path)
2142
2143
2144 def sanitize_url(url):
2145     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2146     # the number of unwanted failures due to missing protocol
2147     if url.startswith('//'):
2148         return 'http:%s' % url
2149     # Fix some common typos seen so far
2150     COMMON_TYPOS = (
2151         # https://github.com/ytdl-org/youtube-dl/issues/15649
2152         (r'^httpss://', r'https://'),
2153         # https://bx1.be/lives/direct-tv/
2154         (r'^rmtp([es]?)://', r'rtmp\1://'),
2155     )
2156     for mistake, fixup in COMMON_TYPOS:
2157         if re.match(mistake, url):
2158             return re.sub(mistake, fixup, url)
2159     return url
2160
2161
2162 def sanitized_Request(url, *args, **kwargs):
2163     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2164
2165
2166 def expand_path(s):
2167     """Expand shell variables and ~"""
2168     return os.path.expandvars(compat_expanduser(s))
2169
2170
2171 def orderedSet(iterable):
2172     """ Remove all duplicates from the input iterable """
2173     res = []
2174     for el in iterable:
2175         if el not in res:
2176             res.append(el)
2177     return res
2178
2179
2180 def _htmlentity_transform(entity_with_semicolon):
2181     """Transforms an HTML entity to a character."""
2182     entity = entity_with_semicolon[:-1]
2183
2184     # Known non-numeric HTML entity
2185     if entity in compat_html_entities.name2codepoint:
2186         return compat_chr(compat_html_entities.name2codepoint[entity])
2187
2188     # TODO: HTML5 allows entities without a semicolon. For example,
2189     # '&Eacuteric' should be decoded as 'Éric'.
2190     if entity_with_semicolon in compat_html_entities_html5:
2191         return compat_html_entities_html5[entity_with_semicolon]
2192
2193     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2194     if mobj is not None:
2195         numstr = mobj.group(1)
2196         if numstr.startswith('x'):
2197             base = 16
2198             numstr = '0%s' % numstr
2199         else:
2200             base = 10
2201         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2202         try:
2203             return compat_chr(int(numstr, base))
2204         except ValueError:
2205             pass
2206
2207     # Unknown entity in name, return its literal representation
2208     return '&%s;' % entity
2209
2210
2211 def unescapeHTML(s):
2212     if s is None:
2213         return None
2214     assert type(s) == compat_str
2215
2216     return re.sub(
2217         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2218
2219
2220 def process_communicate_or_kill(p, *args, **kwargs):
2221     try:
2222         return p.communicate(*args, **kwargs)
2223     except BaseException:  # Including KeyboardInterrupt
2224         p.kill()
2225         p.wait()
2226         raise
2227
2228
2229 def get_subprocess_encoding():
2230     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2231         # For subprocess calls, encode with locale encoding
2232         # Refer to http://stackoverflow.com/a/9951851/35070
2233         encoding = preferredencoding()
2234     else:
2235         encoding = sys.getfilesystemencoding()
2236     if encoding is None:
2237         encoding = 'utf-8'
2238     return encoding
2239
2240
2241 def encodeFilename(s, for_subprocess=False):
2242     """
2243     @param s The name of the file
2244     """
2245
2246     assert type(s) == compat_str
2247
2248     # Python 3 has a Unicode API
2249     if sys.version_info >= (3, 0):
2250         return s
2251
2252     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2253     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2254     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2255     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2256         return s
2257
2258     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2259     if sys.platform.startswith('java'):
2260         return s
2261
2262     return s.encode(get_subprocess_encoding(), 'ignore')
2263
2264
2265 def decodeFilename(b, for_subprocess=False):
2266
2267     if sys.version_info >= (3, 0):
2268         return b
2269
2270     if not isinstance(b, bytes):
2271         return b
2272
2273     return b.decode(get_subprocess_encoding(), 'ignore')
2274
2275
2276 def encodeArgument(s):
2277     if not isinstance(s, compat_str):
2278         # Legacy code that uses byte strings
2279         # Uncomment the following line after fixing all post processors
2280         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2281         s = s.decode('ascii')
2282     return encodeFilename(s, True)
2283
2284
2285 def decodeArgument(b):
2286     return decodeFilename(b, True)
2287
2288
2289 def decodeOption(optval):
2290     if optval is None:
2291         return optval
2292     if isinstance(optval, bytes):
2293         optval = optval.decode(preferredencoding())
2294
2295     assert isinstance(optval, compat_str)
2296     return optval
2297
2298
2299 def formatSeconds(secs, delim=':'):
2300     if secs > 3600:
2301         return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2302     elif secs > 60:
2303         return '%d%s%02d' % (secs // 60, delim, secs % 60)
2304     else:
2305         return '%d' % secs
2306
2307
2308 def make_HTTPS_handler(params, **kwargs):
2309     opts_no_check_certificate = params.get('nocheckcertificate', False)
2310     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2311         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2312         if opts_no_check_certificate:
2313             context.check_hostname = False
2314             context.verify_mode = ssl.CERT_NONE
2315         try:
2316             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2317         except TypeError:
2318             # Python 2.7.8
2319             # (create_default_context present but HTTPSHandler has no context=)
2320             pass
2321
2322     if sys.version_info < (3, 2):
2323         return YoutubeDLHTTPSHandler(params, **kwargs)
2324     else:  # Python < 3.4
2325         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2326         context.verify_mode = (ssl.CERT_NONE
2327                                if opts_no_check_certificate
2328                                else ssl.CERT_REQUIRED)
2329         context.set_default_verify_paths()
2330         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2331
2332
2333 def bug_reports_message():
2334     if ytdl_is_updateable():
2335         update_cmd = 'type  youtube-dlc -U  to update'
2336     else:
2337         update_cmd = 'see  https://github.com/pukkandan/yt-dlp  on how to update'
2338     msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
2339     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2340     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2341     return msg
2342
2343
2344 class YoutubeDLError(Exception):
2345     """Base exception for YoutubeDL errors."""
2346     pass
2347
2348
2349 class ExtractorError(YoutubeDLError):
2350     """Error during info extraction."""
2351
2352     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2353         """ tb, if given, is the original traceback (so that it can be printed out).
2354         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2355         """
2356
2357         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2358             expected = True
2359         if video_id is not None:
2360             msg = video_id + ': ' + msg
2361         if cause:
2362             msg += ' (caused by %r)' % cause
2363         if not expected:
2364             msg += bug_reports_message()
2365         super(ExtractorError, self).__init__(msg)
2366
2367         self.traceback = tb
2368         self.exc_info = sys.exc_info()  # preserve original exception
2369         self.cause = cause
2370         self.video_id = video_id
2371
2372     def format_traceback(self):
2373         if self.traceback is None:
2374             return None
2375         return ''.join(traceback.format_tb(self.traceback))
2376
2377
2378 class UnsupportedError(ExtractorError):
2379     def __init__(self, url):
2380         super(UnsupportedError, self).__init__(
2381             'Unsupported URL: %s' % url, expected=True)
2382         self.url = url
2383
2384
2385 class RegexNotFoundError(ExtractorError):
2386     """Error when a regex didn't match"""
2387     pass
2388
2389
2390 class GeoRestrictedError(ExtractorError):
2391     """Geographic restriction Error exception.
2392
2393     This exception may be thrown when a video is not available from your
2394     geographic location due to geographic restrictions imposed by a website.
2395     """
2396
2397     def __init__(self, msg, countries=None):
2398         super(GeoRestrictedError, self).__init__(msg, expected=True)
2399         self.msg = msg
2400         self.countries = countries
2401
2402
2403 class DownloadError(YoutubeDLError):
2404     """Download Error exception.
2405
2406     This exception may be thrown by FileDownloader objects if they are not
2407     configured to continue on errors. They will contain the appropriate
2408     error message.
2409     """
2410
2411     def __init__(self, msg, exc_info=None):
2412         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2413         super(DownloadError, self).__init__(msg)
2414         self.exc_info = exc_info
2415
2416
2417 class SameFileError(YoutubeDLError):
2418     """Same File exception.
2419
2420     This exception will be thrown by FileDownloader objects if they detect
2421     multiple files would have to be downloaded to the same file on disk.
2422     """
2423     pass
2424
2425
2426 class PostProcessingError(YoutubeDLError):
2427     """Post Processing exception.
2428
2429     This exception may be raised by PostProcessor's .run() method to
2430     indicate an error in the postprocessing task.
2431     """
2432
2433     def __init__(self, msg):
2434         super(PostProcessingError, self).__init__(msg)
2435         self.msg = msg
2436
2437
2438 class ExistingVideoReached(YoutubeDLError):
2439     """ --max-downloads limit has been reached. """
2440     pass
2441
2442
2443 class RejectedVideoReached(YoutubeDLError):
2444     """ --max-downloads limit has been reached. """
2445     pass
2446
2447
2448 class MaxDownloadsReached(YoutubeDLError):
2449     """ --max-downloads limit has been reached. """
2450     pass
2451
2452
2453 class UnavailableVideoError(YoutubeDLError):
2454     """Unavailable Format exception.
2455
2456     This exception will be thrown when a video is requested
2457     in a format that is not available for that video.
2458     """
2459     pass
2460
2461
2462 class ContentTooShortError(YoutubeDLError):
2463     """Content Too Short exception.
2464
2465     This exception may be raised by FileDownloader objects when a file they
2466     download is too small for what the server announced first, indicating
2467     the connection was probably interrupted.
2468     """
2469
2470     def __init__(self, downloaded, expected):
2471         super(ContentTooShortError, self).__init__(
2472             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2473         )
2474         # Both in bytes
2475         self.downloaded = downloaded
2476         self.expected = expected
2477
2478
2479 class XAttrMetadataError(YoutubeDLError):
2480     def __init__(self, code=None, msg='Unknown error'):
2481         super(XAttrMetadataError, self).__init__(msg)
2482         self.code = code
2483         self.msg = msg
2484
2485         # Parsing code and msg
2486         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2487                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2488             self.reason = 'NO_SPACE'
2489         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2490             self.reason = 'VALUE_TOO_LONG'
2491         else:
2492             self.reason = 'NOT_SUPPORTED'
2493
2494
2495 class XAttrUnavailableError(YoutubeDLError):
2496     pass
2497
2498
2499 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2500     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2501     # expected HTTP responses to meet HTTP/1.0 or later (see also
2502     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2503     if sys.version_info < (3, 0):
2504         kwargs['strict'] = True
2505     hc = http_class(*args, **compat_kwargs(kwargs))
2506     source_address = ydl_handler._params.get('source_address')
2507
2508     if source_address is not None:
2509         # This is to workaround _create_connection() from socket where it will try all
2510         # address data from getaddrinfo() including IPv6. This filters the result from
2511         # getaddrinfo() based on the source_address value.
2512         # This is based on the cpython socket.create_connection() function.
2513         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2514         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2515             host, port = address
2516             err = None
2517             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2518             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2519             ip_addrs = [addr for addr in addrs if addr[0] == af]
2520             if addrs and not ip_addrs:
2521                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2522                 raise socket.error(
2523                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2524                     % (ip_version, source_address[0]))
2525             for res in ip_addrs:
2526                 af, socktype, proto, canonname, sa = res
2527                 sock = None
2528                 try:
2529                     sock = socket.socket(af, socktype, proto)
2530                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2531                         sock.settimeout(timeout)
2532                     sock.bind(source_address)
2533                     sock.connect(sa)
2534                     err = None  # Explicitly break reference cycle
2535                     return sock
2536                 except socket.error as _:
2537                     err = _
2538                     if sock is not None:
2539                         sock.close()
2540             if err is not None:
2541                 raise err
2542             else:
2543                 raise socket.error('getaddrinfo returns an empty list')
2544         if hasattr(hc, '_create_connection'):
2545             hc._create_connection = _create_connection
2546         sa = (source_address, 0)
2547         if hasattr(hc, 'source_address'):  # Python 2.7+
2548             hc.source_address = sa
2549         else:  # Python 2.6
2550             def _hc_connect(self, *args, **kwargs):
2551                 sock = _create_connection(
2552                     (self.host, self.port), self.timeout, sa)
2553                 if is_https:
2554                     self.sock = ssl.wrap_socket(
2555                         sock, self.key_file, self.cert_file,
2556                         ssl_version=ssl.PROTOCOL_TLSv1)
2557                 else:
2558                     self.sock = sock
2559             hc.connect = functools.partial(_hc_connect, hc)
2560
2561     return hc
2562
2563
2564 def handle_youtubedl_headers(headers):
2565     filtered_headers = headers
2566
2567     if 'Youtubedl-no-compression' in filtered_headers:
2568         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2569         del filtered_headers['Youtubedl-no-compression']
2570
2571     return filtered_headers
2572
2573
2574 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2575     """Handler for HTTP requests and responses.
2576
2577     This class, when installed with an OpenerDirector, automatically adds
2578     the standard headers to every HTTP request and handles gzipped and
2579     deflated responses from web servers. If compression is to be avoided in
2580     a particular request, the original request in the program code only has
2581     to include the HTTP header "Youtubedl-no-compression", which will be
2582     removed before making the real request.
2583
2584     Part of this code was copied from:
2585
2586     http://techknack.net/python-urllib2-handlers/
2587
2588     Andrew Rowls, the author of that code, agreed to release it to the
2589     public domain.
2590     """
2591
2592     def __init__(self, params, *args, **kwargs):
2593         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2594         self._params = params
2595
2596     def http_open(self, req):
2597         conn_class = compat_http_client.HTTPConnection
2598
2599         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2600         if socks_proxy:
2601             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2602             del req.headers['Ytdl-socks-proxy']
2603
2604         return self.do_open(functools.partial(
2605             _create_http_connection, self, conn_class, False),
2606             req)
2607
2608     @staticmethod
2609     def deflate(data):
2610         try:
2611             return zlib.decompress(data, -zlib.MAX_WBITS)
2612         except zlib.error:
2613             return zlib.decompress(data)
2614
2615     def http_request(self, req):
2616         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2617         # always respected by websites, some tend to give out URLs with non percent-encoded
2618         # non-ASCII characters (see telemb.py, ard.py [#3412])
2619         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2620         # To work around aforementioned issue we will replace request's original URL with
2621         # percent-encoded one
2622         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2623         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2624         url = req.get_full_url()
2625         url_escaped = escape_url(url)
2626
2627         # Substitute URL if any change after escaping
2628         if url != url_escaped:
2629             req = update_Request(req, url=url_escaped)
2630
2631         for h, v in std_headers.items():
2632             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2633             # The dict keys are capitalized because of this bug by urllib
2634             if h.capitalize() not in req.headers:
2635                 req.add_header(h, v)
2636
2637         req.headers = handle_youtubedl_headers(req.headers)
2638
2639         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2640             # Python 2.6 is brain-dead when it comes to fragments
2641             req._Request__original = req._Request__original.partition('#')[0]
2642             req._Request__r_type = req._Request__r_type.partition('#')[0]
2643
2644         return req
2645
2646     def http_response(self, req, resp):
2647         old_resp = resp
2648         # gzip
2649         if resp.headers.get('Content-encoding', '') == 'gzip':
2650             content = resp.read()
2651             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2652             try:
2653                 uncompressed = io.BytesIO(gz.read())
2654             except IOError as original_ioerror:
2655                 # There may be junk add the end of the file
2656                 # See http://stackoverflow.com/q/4928560/35070 for details
2657                 for i in range(1, 1024):
2658                     try:
2659                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2660                         uncompressed = io.BytesIO(gz.read())
2661                     except IOError:
2662                         continue
2663                     break
2664                 else:
2665                     raise original_ioerror
2666             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2667             resp.msg = old_resp.msg
2668             del resp.headers['Content-encoding']
2669         # deflate
2670         if resp.headers.get('Content-encoding', '') == 'deflate':
2671             gz = io.BytesIO(self.deflate(resp.read()))
2672             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2673             resp.msg = old_resp.msg
2674             del resp.headers['Content-encoding']
2675         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2676         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2677         if 300 <= resp.code < 400:
2678             location = resp.headers.get('Location')
2679             if location:
2680                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2681                 if sys.version_info >= (3, 0):
2682                     location = location.encode('iso-8859-1').decode('utf-8')
2683                 else:
2684                     location = location.decode('utf-8')
2685                 location_escaped = escape_url(location)
2686                 if location != location_escaped:
2687                     del resp.headers['Location']
2688                     if sys.version_info < (3, 0):
2689                         location_escaped = location_escaped.encode('utf-8')
2690                     resp.headers['Location'] = location_escaped
2691         return resp
2692
2693     https_request = http_request
2694     https_response = http_response
2695
2696
2697 def make_socks_conn_class(base_class, socks_proxy):
2698     assert issubclass(base_class, (
2699         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2700
2701     url_components = compat_urlparse.urlparse(socks_proxy)
2702     if url_components.scheme.lower() == 'socks5':
2703         socks_type = ProxyType.SOCKS5
2704     elif url_components.scheme.lower() in ('socks', 'socks4'):
2705         socks_type = ProxyType.SOCKS4
2706     elif url_components.scheme.lower() == 'socks4a':
2707         socks_type = ProxyType.SOCKS4A
2708
2709     def unquote_if_non_empty(s):
2710         if not s:
2711             return s
2712         return compat_urllib_parse_unquote_plus(s)
2713
2714     proxy_args = (
2715         socks_type,
2716         url_components.hostname, url_components.port or 1080,
2717         True,  # Remote DNS
2718         unquote_if_non_empty(url_components.username),
2719         unquote_if_non_empty(url_components.password),
2720     )
2721
2722     class SocksConnection(base_class):
2723         def connect(self):
2724             self.sock = sockssocket()
2725             self.sock.setproxy(*proxy_args)
2726             if type(self.timeout) in (int, float):
2727                 self.sock.settimeout(self.timeout)
2728             self.sock.connect((self.host, self.port))
2729
2730             if isinstance(self, compat_http_client.HTTPSConnection):
2731                 if hasattr(self, '_context'):  # Python > 2.6
2732                     self.sock = self._context.wrap_socket(
2733                         self.sock, server_hostname=self.host)
2734                 else:
2735                     self.sock = ssl.wrap_socket(self.sock)
2736
2737     return SocksConnection
2738
2739
2740 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2741     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2742         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2743         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2744         self._params = params
2745
2746     def https_open(self, req):
2747         kwargs = {}
2748         conn_class = self._https_conn_class
2749
2750         if hasattr(self, '_context'):  # python > 2.6
2751             kwargs['context'] = self._context
2752         if hasattr(self, '_check_hostname'):  # python 3.x
2753             kwargs['check_hostname'] = self._check_hostname
2754
2755         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2756         if socks_proxy:
2757             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2758             del req.headers['Ytdl-socks-proxy']
2759
2760         return self.do_open(functools.partial(
2761             _create_http_connection, self, conn_class, True),
2762             req, **kwargs)
2763
2764
2765 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2766     """
2767     See [1] for cookie file format.
2768
2769     1. https://curl.haxx.se/docs/http-cookies.html
2770     """
2771     _HTTPONLY_PREFIX = '#HttpOnly_'
2772     _ENTRY_LEN = 7
2773     _HEADER = '''# Netscape HTTP Cookie File
2774 # This file is generated by youtube-dlc.  Do not edit.
2775
2776 '''
2777     _CookieFileEntry = collections.namedtuple(
2778         'CookieFileEntry',
2779         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2780
2781     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2782         """
2783         Save cookies to a file.
2784
2785         Most of the code is taken from CPython 3.8 and slightly adapted
2786         to support cookie files with UTF-8 in both python 2 and 3.
2787         """
2788         if filename is None:
2789             if self.filename is not None:
2790                 filename = self.filename
2791             else:
2792                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2793
2794         # Store session cookies with `expires` set to 0 instead of an empty
2795         # string
2796         for cookie in self:
2797             if cookie.expires is None:
2798                 cookie.expires = 0
2799
2800         with io.open(filename, 'w', encoding='utf-8') as f:
2801             f.write(self._HEADER)
2802             now = time.time()
2803             for cookie in self:
2804                 if not ignore_discard and cookie.discard:
2805                     continue
2806                 if not ignore_expires and cookie.is_expired(now):
2807                     continue
2808                 if cookie.secure:
2809                     secure = 'TRUE'
2810                 else:
2811                     secure = 'FALSE'
2812                 if cookie.domain.startswith('.'):
2813                     initial_dot = 'TRUE'
2814                 else:
2815                     initial_dot = 'FALSE'
2816                 if cookie.expires is not None:
2817                     expires = compat_str(cookie.expires)
2818                 else:
2819                     expires = ''
2820                 if cookie.value is None:
2821                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2822                     # with no name, whereas http.cookiejar regards it as a
2823                     # cookie with no value.
2824                     name = ''
2825                     value = cookie.name
2826                 else:
2827                     name = cookie.name
2828                     value = cookie.value
2829                 f.write(
2830                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2831                                secure, expires, name, value]) + '\n')
2832
2833     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2834         """Load cookies from a file."""
2835         if filename is None:
2836             if self.filename is not None:
2837                 filename = self.filename
2838             else:
2839                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2840
2841         def prepare_line(line):
2842             if line.startswith(self._HTTPONLY_PREFIX):
2843                 line = line[len(self._HTTPONLY_PREFIX):]
2844             # comments and empty lines are fine
2845             if line.startswith('#') or not line.strip():
2846                 return line
2847             cookie_list = line.split('\t')
2848             if len(cookie_list) != self._ENTRY_LEN:
2849                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2850             cookie = self._CookieFileEntry(*cookie_list)
2851             if cookie.expires_at and not cookie.expires_at.isdigit():
2852                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2853             return line
2854
2855         cf = io.StringIO()
2856         with io.open(filename, encoding='utf-8') as f:
2857             for line in f:
2858                 try:
2859                     cf.write(prepare_line(line))
2860                 except compat_cookiejar.LoadError as e:
2861                     write_string(
2862                         'WARNING: skipping cookie file entry due to %s: %r\n'
2863                         % (e, line), sys.stderr)
2864                     continue
2865         cf.seek(0)
2866         self._really_load(cf, filename, ignore_discard, ignore_expires)
2867         # Session cookies are denoted by either `expires` field set to
2868         # an empty string or 0. MozillaCookieJar only recognizes the former
2869         # (see [1]). So we need force the latter to be recognized as session
2870         # cookies on our own.
2871         # Session cookies may be important for cookies-based authentication,
2872         # e.g. usually, when user does not check 'Remember me' check box while
2873         # logging in on a site, some important cookies are stored as session
2874         # cookies so that not recognizing them will result in failed login.
2875         # 1. https://bugs.python.org/issue17164
2876         for cookie in self:
2877             # Treat `expires=0` cookies as session cookies
2878             if cookie.expires == 0:
2879                 cookie.expires = None
2880                 cookie.discard = True
2881
2882
2883 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2884     def __init__(self, cookiejar=None):
2885         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2886
2887     def http_response(self, request, response):
2888         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2889         # characters in Set-Cookie HTTP header of last response (see
2890         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2891         # In order to at least prevent crashing we will percent encode Set-Cookie
2892         # header before HTTPCookieProcessor starts processing it.
2893         # if sys.version_info < (3, 0) and response.headers:
2894         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2895         #         set_cookie = response.headers.get(set_cookie_header)
2896         #         if set_cookie:
2897         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2898         #             if set_cookie != set_cookie_escaped:
2899         #                 del response.headers[set_cookie_header]
2900         #                 response.headers[set_cookie_header] = set_cookie_escaped
2901         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2902
2903     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2904     https_response = http_response
2905
2906
2907 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2908     if sys.version_info[0] < 3:
2909         def redirect_request(self, req, fp, code, msg, headers, newurl):
2910             # On python 2 urlh.geturl() may sometimes return redirect URL
2911             # as byte string instead of unicode. This workaround allows
2912             # to force it always return unicode.
2913             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2914
2915
2916 def extract_timezone(date_str):
2917     m = re.search(
2918         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2919         date_str)
2920     if not m:
2921         timezone = datetime.timedelta()
2922     else:
2923         date_str = date_str[:-len(m.group('tz'))]
2924         if not m.group('sign'):
2925             timezone = datetime.timedelta()
2926         else:
2927             sign = 1 if m.group('sign') == '+' else -1
2928             timezone = datetime.timedelta(
2929                 hours=sign * int(m.group('hours')),
2930                 minutes=sign * int(m.group('minutes')))
2931     return timezone, date_str
2932
2933
2934 def parse_iso8601(date_str, delimiter='T', timezone=None):
2935     """ Return a UNIX timestamp from the given date """
2936
2937     if date_str is None:
2938         return None
2939
2940     date_str = re.sub(r'\.[0-9]+', '', date_str)
2941
2942     if timezone is None:
2943         timezone, date_str = extract_timezone(date_str)
2944
2945     try:
2946         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2947         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2948         return calendar.timegm(dt.timetuple())
2949     except ValueError:
2950         pass
2951
2952
2953 def date_formats(day_first=True):
2954     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2955
2956
2957 def unified_strdate(date_str, day_first=True):
2958     """Return a string with the date in the format YYYYMMDD"""
2959
2960     if date_str is None:
2961         return None
2962     upload_date = None
2963     # Replace commas
2964     date_str = date_str.replace(',', ' ')
2965     # Remove AM/PM + timezone
2966     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2967     _, date_str = extract_timezone(date_str)
2968
2969     for expression in date_formats(day_first):
2970         try:
2971             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2972         except ValueError:
2973             pass
2974     if upload_date is None:
2975         timetuple = email.utils.parsedate_tz(date_str)
2976         if timetuple:
2977             try:
2978                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2979             except ValueError:
2980                 pass
2981     if upload_date is not None:
2982         return compat_str(upload_date)
2983
2984
2985 def unified_timestamp(date_str, day_first=True):
2986     if date_str is None:
2987         return None
2988
2989     date_str = re.sub(r'[,|]', '', date_str)
2990
2991     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2992     timezone, date_str = extract_timezone(date_str)
2993
2994     # Remove AM/PM + timezone
2995     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2996
2997     # Remove unrecognized timezones from ISO 8601 alike timestamps
2998     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2999     if m:
3000         date_str = date_str[:-len(m.group('tz'))]
3001
3002     # Python only supports microseconds, so remove nanoseconds
3003     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3004     if m:
3005         date_str = m.group(1)
3006
3007     for expression in date_formats(day_first):
3008         try:
3009             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3010             return calendar.timegm(dt.timetuple())
3011         except ValueError:
3012             pass
3013     timetuple = email.utils.parsedate_tz(date_str)
3014     if timetuple:
3015         return calendar.timegm(timetuple) + pm_delta * 3600
3016
3017
3018 def determine_ext(url, default_ext='unknown_video'):
3019     if url is None or '.' not in url:
3020         return default_ext
3021     guess = url.partition('?')[0].rpartition('.')[2]
3022     if re.match(r'^[A-Za-z0-9]+$', guess):
3023         return guess
3024     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3025     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3026         return guess.rstrip('/')
3027     else:
3028         return default_ext
3029
3030
3031 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3032     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3033
3034
3035 def date_from_str(date_str):
3036     """
3037     Return a datetime object from a string in the format YYYYMMDD or
3038     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3039     today = datetime.date.today()
3040     if date_str in ('now', 'today'):
3041         return today
3042     if date_str == 'yesterday':
3043         return today - datetime.timedelta(days=1)
3044     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3045     if match is not None:
3046         sign = match.group('sign')
3047         time = int(match.group('time'))
3048         if sign == '-':
3049             time = -time
3050         unit = match.group('unit')
3051         # A bad approximation?
3052         if unit == 'month':
3053             unit = 'day'
3054             time *= 30
3055         elif unit == 'year':
3056             unit = 'day'
3057             time *= 365
3058         unit += 's'
3059         delta = datetime.timedelta(**{unit: time})
3060         return today + delta
3061     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3062
3063
3064 def hyphenate_date(date_str):
3065     """
3066     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3067     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3068     if match is not None:
3069         return '-'.join(match.groups())
3070     else:
3071         return date_str
3072
3073
3074 class DateRange(object):
3075     """Represents a time interval between two dates"""
3076
3077     def __init__(self, start=None, end=None):
3078         """start and end must be strings in the format accepted by date"""
3079         if start is not None:
3080             self.start = date_from_str(start)
3081         else:
3082             self.start = datetime.datetime.min.date()
3083         if end is not None:
3084             self.end = date_from_str(end)
3085         else:
3086             self.end = datetime.datetime.max.date()
3087         if self.start > self.end:
3088             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3089
3090     @classmethod
3091     def day(cls, day):
3092         """Returns a range that only contains the given day"""
3093         return cls(day, day)
3094
3095     def __contains__(self, date):
3096         """Check if the date is in the range"""
3097         if not isinstance(date, datetime.date):
3098             date = date_from_str(date)
3099         return self.start <= date <= self.end
3100
3101     def __str__(self):
3102         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3103
3104
3105 def platform_name():
3106     """ Returns the platform name as a compat_str """
3107     res = platform.platform()
3108     if isinstance(res, bytes):
3109         res = res.decode(preferredencoding())
3110
3111     assert isinstance(res, compat_str)
3112     return res
3113
3114
3115 def _windows_write_string(s, out):
3116     """ Returns True if the string was written using special methods,
3117     False if it has yet to be written out."""
3118     # Adapted from http://stackoverflow.com/a/3259271/35070
3119
3120     import ctypes
3121     import ctypes.wintypes
3122
3123     WIN_OUTPUT_IDS = {
3124         1: -11,
3125         2: -12,
3126     }
3127
3128     try:
3129         fileno = out.fileno()
3130     except AttributeError:
3131         # If the output stream doesn't have a fileno, it's virtual
3132         return False
3133     except io.UnsupportedOperation:
3134         # Some strange Windows pseudo files?
3135         return False
3136     if fileno not in WIN_OUTPUT_IDS:
3137         return False
3138
3139     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3140         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3141         ('GetStdHandle', ctypes.windll.kernel32))
3142     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3143
3144     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3145         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3146         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3147         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3148     written = ctypes.wintypes.DWORD(0)
3149
3150     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3151     FILE_TYPE_CHAR = 0x0002
3152     FILE_TYPE_REMOTE = 0x8000
3153     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3154         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3155         ctypes.POINTER(ctypes.wintypes.DWORD))(
3156         ('GetConsoleMode', ctypes.windll.kernel32))
3157     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3158
3159     def not_a_console(handle):
3160         if handle == INVALID_HANDLE_VALUE or handle is None:
3161             return True
3162         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3163                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3164
3165     if not_a_console(h):
3166         return False
3167
3168     def next_nonbmp_pos(s):
3169         try:
3170             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3171         except StopIteration:
3172             return len(s)
3173
3174     while s:
3175         count = min(next_nonbmp_pos(s), 1024)
3176
3177         ret = WriteConsoleW(
3178             h, s, count if count else 2, ctypes.byref(written), None)
3179         if ret == 0:
3180             raise OSError('Failed to write string')
3181         if not count:  # We just wrote a non-BMP character
3182             assert written.value == 2
3183             s = s[1:]
3184         else:
3185             assert written.value > 0
3186             s = s[written.value:]
3187     return True
3188
3189
3190 def write_string(s, out=None, encoding=None):
3191     if out is None:
3192         out = sys.stderr
3193     assert type(s) == compat_str
3194
3195     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3196         if _windows_write_string(s, out):
3197             return
3198
3199     if ('b' in getattr(out, 'mode', '')
3200             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3201         byt = s.encode(encoding or preferredencoding(), 'ignore')
3202         out.write(byt)
3203     elif hasattr(out, 'buffer'):
3204         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3205         byt = s.encode(enc, 'ignore')
3206         out.buffer.write(byt)
3207     else:
3208         out.write(s)
3209     out.flush()
3210
3211
3212 def bytes_to_intlist(bs):
3213     if not bs:
3214         return []
3215     if isinstance(bs[0], int):  # Python 3
3216         return list(bs)
3217     else:
3218         return [ord(c) for c in bs]
3219
3220
3221 def intlist_to_bytes(xs):
3222     if not xs:
3223         return b''
3224     return compat_struct_pack('%dB' % len(xs), *xs)
3225
3226
3227 # Cross-platform file locking
3228 if sys.platform == 'win32':
3229     import ctypes.wintypes
3230     import msvcrt
3231
3232     class OVERLAPPED(ctypes.Structure):
3233         _fields_ = [
3234             ('Internal', ctypes.wintypes.LPVOID),
3235             ('InternalHigh', ctypes.wintypes.LPVOID),
3236             ('Offset', ctypes.wintypes.DWORD),
3237             ('OffsetHigh', ctypes.wintypes.DWORD),
3238             ('hEvent', ctypes.wintypes.HANDLE),
3239         ]
3240
3241     kernel32 = ctypes.windll.kernel32
3242     LockFileEx = kernel32.LockFileEx
3243     LockFileEx.argtypes = [
3244         ctypes.wintypes.HANDLE,     # hFile
3245         ctypes.wintypes.DWORD,      # dwFlags
3246         ctypes.wintypes.DWORD,      # dwReserved
3247         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3248         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3249         ctypes.POINTER(OVERLAPPED)  # Overlapped
3250     ]
3251     LockFileEx.restype = ctypes.wintypes.BOOL
3252     UnlockFileEx = kernel32.UnlockFileEx
3253     UnlockFileEx.argtypes = [
3254         ctypes.wintypes.HANDLE,     # hFile
3255         ctypes.wintypes.DWORD,      # dwReserved
3256         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3257         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3258         ctypes.POINTER(OVERLAPPED)  # Overlapped
3259     ]
3260     UnlockFileEx.restype = ctypes.wintypes.BOOL
3261     whole_low = 0xffffffff
3262     whole_high = 0x7fffffff
3263
3264     def _lock_file(f, exclusive):
3265         overlapped = OVERLAPPED()
3266         overlapped.Offset = 0
3267         overlapped.OffsetHigh = 0
3268         overlapped.hEvent = 0
3269         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3270         handle = msvcrt.get_osfhandle(f.fileno())
3271         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3272                           whole_low, whole_high, f._lock_file_overlapped_p):
3273             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3274
3275     def _unlock_file(f):
3276         assert f._lock_file_overlapped_p
3277         handle = msvcrt.get_osfhandle(f.fileno())
3278         if not UnlockFileEx(handle, 0,
3279                             whole_low, whole_high, f._lock_file_overlapped_p):
3280             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3281
3282 else:
3283     # Some platforms, such as Jython, is missing fcntl
3284     try:
3285         import fcntl
3286
3287         def _lock_file(f, exclusive):
3288             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3289
3290         def _unlock_file(f):
3291             fcntl.flock(f, fcntl.LOCK_UN)
3292     except ImportError:
3293         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3294
3295         def _lock_file(f, exclusive):
3296             raise IOError(UNSUPPORTED_MSG)
3297
3298         def _unlock_file(f):
3299             raise IOError(UNSUPPORTED_MSG)
3300
3301
3302 class locked_file(object):
3303     def __init__(self, filename, mode, encoding=None):
3304         assert mode in ['r', 'a', 'w']
3305         self.f = io.open(filename, mode, encoding=encoding)
3306         self.mode = mode
3307
3308     def __enter__(self):
3309         exclusive = self.mode != 'r'
3310         try:
3311             _lock_file(self.f, exclusive)
3312         except IOError:
3313             self.f.close()
3314             raise
3315         return self
3316
3317     def __exit__(self, etype, value, traceback):
3318         try:
3319             _unlock_file(self.f)
3320         finally:
3321             self.f.close()
3322
3323     def __iter__(self):
3324         return iter(self.f)
3325
3326     def write(self, *args):
3327         return self.f.write(*args)
3328
3329     def read(self, *args):
3330         return self.f.read(*args)
3331
3332
3333 def get_filesystem_encoding():
3334     encoding = sys.getfilesystemencoding()
3335     return encoding if encoding is not None else 'utf-8'
3336
3337
3338 def shell_quote(args):
3339     quoted_args = []
3340     encoding = get_filesystem_encoding()
3341     for a in args:
3342         if isinstance(a, bytes):
3343             # We may get a filename encoded with 'encodeFilename'
3344             a = a.decode(encoding)
3345         quoted_args.append(compat_shlex_quote(a))
3346     return ' '.join(quoted_args)
3347
3348
3349 def smuggle_url(url, data):
3350     """ Pass additional data in a URL for internal use. """
3351
3352     url, idata = unsmuggle_url(url, {})
3353     data.update(idata)
3354     sdata = compat_urllib_parse_urlencode(
3355         {'__youtubedl_smuggle': json.dumps(data)})
3356     return url + '#' + sdata
3357
3358
3359 def unsmuggle_url(smug_url, default=None):
3360     if '#__youtubedl_smuggle' not in smug_url:
3361         return smug_url, default
3362     url, _, sdata = smug_url.rpartition('#')
3363     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3364     data = json.loads(jsond)
3365     return url, data
3366
3367
3368 def format_bytes(bytes):
3369     if bytes is None:
3370         return 'N/A'
3371     if type(bytes) is str:
3372         bytes = float(bytes)
3373     if bytes == 0.0:
3374         exponent = 0
3375     else:
3376         exponent = int(math.log(bytes, 1024.0))
3377     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3378     converted = float(bytes) / float(1024 ** exponent)
3379     return '%.2f%s' % (converted, suffix)
3380
3381
3382 def lookup_unit_table(unit_table, s):
3383     units_re = '|'.join(re.escape(u) for u in unit_table)
3384     m = re.match(
3385         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3386     if not m:
3387         return None
3388     num_str = m.group('num').replace(',', '.')
3389     mult = unit_table[m.group('unit')]
3390     return int(float(num_str) * mult)
3391
3392
3393 def parse_filesize(s):
3394     if s is None:
3395         return None
3396
3397     # The lower-case forms are of course incorrect and unofficial,
3398     # but we support those too
3399     _UNIT_TABLE = {
3400         'B': 1,
3401         'b': 1,
3402         'bytes': 1,
3403         'KiB': 1024,
3404         'KB': 1000,
3405         'kB': 1024,
3406         'Kb': 1000,
3407         'kb': 1000,
3408         'kilobytes': 1000,
3409         'kibibytes': 1024,
3410         'MiB': 1024 ** 2,
3411         'MB': 1000 ** 2,
3412         'mB': 1024 ** 2,
3413         'Mb': 1000 ** 2,
3414         'mb': 1000 ** 2,
3415         'megabytes': 1000 ** 2,
3416         'mebibytes': 1024 ** 2,
3417         'GiB': 1024 ** 3,
3418         'GB': 1000 ** 3,
3419         'gB': 1024 ** 3,
3420         'Gb': 1000 ** 3,
3421         'gb': 1000 ** 3,
3422         'gigabytes': 1000 ** 3,
3423         'gibibytes': 1024 ** 3,
3424         'TiB': 1024 ** 4,
3425         'TB': 1000 ** 4,
3426         'tB': 1024 ** 4,
3427         'Tb': 1000 ** 4,
3428         'tb': 1000 ** 4,
3429         'terabytes': 1000 ** 4,
3430         'tebibytes': 1024 ** 4,
3431         'PiB': 1024 ** 5,
3432         'PB': 1000 ** 5,
3433         'pB': 1024 ** 5,
3434         'Pb': 1000 ** 5,
3435         'pb': 1000 ** 5,
3436         'petabytes': 1000 ** 5,
3437         'pebibytes': 1024 ** 5,
3438         'EiB': 1024 ** 6,
3439         'EB': 1000 ** 6,
3440         'eB': 1024 ** 6,
3441         'Eb': 1000 ** 6,
3442         'eb': 1000 ** 6,
3443         'exabytes': 1000 ** 6,
3444         'exbibytes': 1024 ** 6,
3445         'ZiB': 1024 ** 7,
3446         'ZB': 1000 ** 7,
3447         'zB': 1024 ** 7,
3448         'Zb': 1000 ** 7,
3449         'zb': 1000 ** 7,
3450         'zettabytes': 1000 ** 7,
3451         'zebibytes': 1024 ** 7,
3452         'YiB': 1024 ** 8,
3453         'YB': 1000 ** 8,
3454         'yB': 1024 ** 8,
3455         'Yb': 1000 ** 8,
3456         'yb': 1000 ** 8,
3457         'yottabytes': 1000 ** 8,
3458         'yobibytes': 1024 ** 8,
3459     }
3460
3461     return lookup_unit_table(_UNIT_TABLE, s)
3462
3463
3464 def parse_count(s):
3465     if s is None:
3466         return None
3467
3468     s = s.strip()
3469
3470     if re.match(r'^[\d,.]+$', s):
3471         return str_to_int(s)
3472
3473     _UNIT_TABLE = {
3474         'k': 1000,
3475         'K': 1000,
3476         'm': 1000 ** 2,
3477         'M': 1000 ** 2,
3478         'kk': 1000 ** 2,
3479         'KK': 1000 ** 2,
3480     }
3481
3482     return lookup_unit_table(_UNIT_TABLE, s)
3483
3484
3485 def parse_resolution(s):
3486     if s is None:
3487         return {}
3488
3489     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3490     if mobj:
3491         return {
3492             'width': int(mobj.group('w')),
3493             'height': int(mobj.group('h')),
3494         }
3495
3496     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3497     if mobj:
3498         return {'height': int(mobj.group(1))}
3499
3500     mobj = re.search(r'\b([48])[kK]\b', s)
3501     if mobj:
3502         return {'height': int(mobj.group(1)) * 540}
3503
3504     return {}
3505
3506
3507 def parse_bitrate(s):
3508     if not isinstance(s, compat_str):
3509         return
3510     mobj = re.search(r'\b(\d+)\s*kbps', s)
3511     if mobj:
3512         return int(mobj.group(1))
3513
3514
3515 def month_by_name(name, lang='en'):
3516     """ Return the number of a month by (locale-independently) English name """
3517
3518     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3519
3520     try:
3521         return month_names.index(name) + 1
3522     except ValueError:
3523         return None
3524
3525
3526 def month_by_abbreviation(abbrev):
3527     """ Return the number of a month by (locale-independently) English
3528         abbreviations """
3529
3530     try:
3531         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3532     except ValueError:
3533         return None
3534
3535
3536 def fix_xml_ampersands(xml_str):
3537     """Replace all the '&' by '&amp;' in XML"""
3538     return re.sub(
3539         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3540         '&amp;',
3541         xml_str)
3542
3543
3544 def setproctitle(title):
3545     assert isinstance(title, compat_str)
3546
3547     # ctypes in Jython is not complete
3548     # http://bugs.jython.org/issue2148
3549     if sys.platform.startswith('java'):
3550         return
3551
3552     try:
3553         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3554     except OSError:
3555         return
3556     except TypeError:
3557         # LoadLibrary in Windows Python 2.7.13 only expects
3558         # a bytestring, but since unicode_literals turns
3559         # every string into a unicode string, it fails.
3560         return
3561     title_bytes = title.encode('utf-8')
3562     buf = ctypes.create_string_buffer(len(title_bytes))
3563     buf.value = title_bytes
3564     try:
3565         libc.prctl(15, buf, 0, 0, 0)
3566     except AttributeError:
3567         return  # Strange libc, just skip this
3568
3569
3570 def remove_start(s, start):
3571     return s[len(start):] if s is not None and s.startswith(start) else s
3572
3573
3574 def remove_end(s, end):
3575     return s[:-len(end)] if s is not None and s.endswith(end) else s
3576
3577
3578 def remove_quotes(s):
3579     if s is None or len(s) < 2:
3580         return s
3581     for quote in ('"', "'", ):
3582         if s[0] == quote and s[-1] == quote:
3583             return s[1:-1]
3584     return s
3585
3586
3587 def get_domain(url):
3588     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3589     return domain.group('domain') if domain else None
3590
3591
3592 def url_basename(url):
3593     path = compat_urlparse.urlparse(url).path
3594     return path.strip('/').split('/')[-1]
3595
3596
3597 def base_url(url):
3598     return re.match(r'https?://[^?#&]+/', url).group()
3599
3600
3601 def urljoin(base, path):
3602     if isinstance(path, bytes):
3603         path = path.decode('utf-8')
3604     if not isinstance(path, compat_str) or not path:
3605         return None
3606     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3607         return path
3608     if isinstance(base, bytes):
3609         base = base.decode('utf-8')
3610     if not isinstance(base, compat_str) or not re.match(
3611             r'^(?:https?:)?//', base):
3612         return None
3613     return compat_urlparse.urljoin(base, path)
3614
3615
3616 class HEADRequest(compat_urllib_request.Request):
3617     def get_method(self):
3618         return 'HEAD'
3619
3620
3621 class PUTRequest(compat_urllib_request.Request):
3622     def get_method(self):
3623         return 'PUT'
3624
3625
3626 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3627     if get_attr:
3628         if v is not None:
3629             v = getattr(v, get_attr, None)
3630     if v == '':
3631         v = None
3632     if v is None:
3633         return default
3634     try:
3635         return int(v) * invscale // scale
3636     except (ValueError, TypeError):
3637         return default
3638
3639
3640 def str_or_none(v, default=None):
3641     return default if v is None else compat_str(v)
3642
3643
3644 def str_to_int(int_str):
3645     """ A more relaxed version of int_or_none """
3646     if isinstance(int_str, compat_integer_types):
3647         return int_str
3648     elif isinstance(int_str, compat_str):
3649         int_str = re.sub(r'[,\.\+]', '', int_str)
3650         return int_or_none(int_str)
3651
3652
3653 def float_or_none(v, scale=1, invscale=1, default=None):
3654     if v is None:
3655         return default
3656     try:
3657         return float(v) * invscale / scale
3658     except (ValueError, TypeError):
3659         return default
3660
3661
3662 def bool_or_none(v, default=None):
3663     return v if isinstance(v, bool) else default
3664
3665
3666 def strip_or_none(v, default=None):
3667     return v.strip() if isinstance(v, compat_str) else default
3668
3669
3670 def url_or_none(url):
3671     if not url or not isinstance(url, compat_str):
3672         return None
3673     url = url.strip()
3674     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3675
3676
3677 def strftime_or_none(timestamp, date_format, default=None):
3678     datetime_object = None
3679     try:
3680         if isinstance(timestamp, compat_numeric_types):  # unix timestamp
3681             datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3682         elif isinstance(timestamp, compat_str):  # assume YYYYMMDD
3683             datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3684         return datetime_object.strftime(date_format)
3685     except (ValueError, TypeError, AttributeError):
3686         return default
3687
3688
3689 def parse_duration(s):
3690     if not isinstance(s, compat_basestring):
3691         return None
3692
3693     s = s.strip()
3694
3695     days, hours, mins, secs, ms = [None] * 5
3696     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3697     if m:
3698         days, hours, mins, secs, ms = m.groups()
3699     else:
3700         m = re.match(
3701             r'''(?ix)(?:P?
3702                 (?:
3703                     [0-9]+\s*y(?:ears?)?\s*
3704                 )?
3705                 (?:
3706                     [0-9]+\s*m(?:onths?)?\s*
3707                 )?
3708                 (?:
3709                     [0-9]+\s*w(?:eeks?)?\s*
3710                 )?
3711                 (?:
3712                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3713                 )?
3714                 T)?
3715                 (?:
3716                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3717                 )?
3718                 (?:
3719                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3720                 )?
3721                 (?:
3722                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3723                 )?Z?$''', s)
3724         if m:
3725             days, hours, mins, secs, ms = m.groups()
3726         else:
3727             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3728             if m:
3729                 hours, mins = m.groups()
3730             else:
3731                 return None
3732
3733     duration = 0
3734     if secs:
3735         duration += float(secs)
3736     if mins:
3737         duration += float(mins) * 60
3738     if hours:
3739         duration += float(hours) * 60 * 60
3740     if days:
3741         duration += float(days) * 24 * 60 * 60
3742     if ms:
3743         duration += float(ms)
3744     return duration
3745
3746
3747 def prepend_extension(filename, ext, expected_real_ext=None):
3748     name, real_ext = os.path.splitext(filename)
3749     return (
3750         '{0}.{1}{2}'.format(name, ext, real_ext)
3751         if not expected_real_ext or real_ext[1:] == expected_real_ext
3752         else '{0}.{1}'.format(filename, ext))
3753
3754
3755 def replace_extension(filename, ext, expected_real_ext=None):
3756     name, real_ext = os.path.splitext(filename)
3757     return '{0}.{1}'.format(
3758         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3759         ext)
3760
3761
3762 def check_executable(exe, args=[]):
3763     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3764     args can be a list of arguments for a short output (like -version) """
3765     try:
3766         process_communicate_or_kill(subprocess.Popen(
3767             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3768     except OSError:
3769         return False
3770     return exe
3771
3772
3773 def get_exe_version(exe, args=['--version'],
3774                     version_re=None, unrecognized='present'):
3775     """ Returns the version of the specified executable,
3776     or False if the executable is not present """
3777     try:
3778         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3779         # SIGTTOU if youtube-dlc is run in the background.
3780         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3781         out, _ = process_communicate_or_kill(subprocess.Popen(
3782             [encodeArgument(exe)] + args,
3783             stdin=subprocess.PIPE,
3784             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3785     except OSError:
3786         return False
3787     if isinstance(out, bytes):  # Python 2.x
3788         out = out.decode('ascii', 'ignore')
3789     return detect_exe_version(out, version_re, unrecognized)
3790
3791
3792 def detect_exe_version(output, version_re=None, unrecognized='present'):
3793     assert isinstance(output, compat_str)
3794     if version_re is None:
3795         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3796     m = re.search(version_re, output)
3797     if m:
3798         return m.group(1)
3799     else:
3800         return unrecognized
3801
3802
3803 class PagedList(object):
3804     def __len__(self):
3805         # This is only useful for tests
3806         return len(self.getslice())
3807
3808
3809 class OnDemandPagedList(PagedList):
3810     def __init__(self, pagefunc, pagesize, use_cache=True):
3811         self._pagefunc = pagefunc
3812         self._pagesize = pagesize
3813         self._use_cache = use_cache
3814         if use_cache:
3815             self._cache = {}
3816
3817     def getslice(self, start=0, end=None):
3818         res = []
3819         for pagenum in itertools.count(start // self._pagesize):
3820             firstid = pagenum * self._pagesize
3821             nextfirstid = pagenum * self._pagesize + self._pagesize
3822             if start >= nextfirstid:
3823                 continue
3824
3825             page_results = None
3826             if self._use_cache:
3827                 page_results = self._cache.get(pagenum)
3828             if page_results is None:
3829                 page_results = list(self._pagefunc(pagenum))
3830             if self._use_cache:
3831                 self._cache[pagenum] = page_results
3832
3833             startv = (
3834                 start % self._pagesize
3835                 if firstid <= start < nextfirstid
3836                 else 0)
3837
3838             endv = (
3839                 ((end - 1) % self._pagesize) + 1
3840                 if (end is not None and firstid <= end <= nextfirstid)
3841                 else None)
3842
3843             if startv != 0 or endv is not None:
3844                 page_results = page_results[startv:endv]
3845             res.extend(page_results)
3846
3847             # A little optimization - if current page is not "full", ie. does
3848             # not contain page_size videos then we can assume that this page
3849             # is the last one - there are no more ids on further pages -
3850             # i.e. no need to query again.
3851             if len(page_results) + startv < self._pagesize:
3852                 break
3853
3854             # If we got the whole page, but the next page is not interesting,
3855             # break out early as well
3856             if end == nextfirstid:
3857                 break
3858         return res
3859
3860
3861 class InAdvancePagedList(PagedList):
3862     def __init__(self, pagefunc, pagecount, pagesize):
3863         self._pagefunc = pagefunc
3864         self._pagecount = pagecount
3865         self._pagesize = pagesize
3866
3867     def getslice(self, start=0, end=None):
3868         res = []
3869         start_page = start // self._pagesize
3870         end_page = (
3871             self._pagecount if end is None else (end // self._pagesize + 1))
3872         skip_elems = start - start_page * self._pagesize
3873         only_more = None if end is None else end - start
3874         for pagenum in range(start_page, end_page):
3875             page = list(self._pagefunc(pagenum))
3876             if skip_elems:
3877                 page = page[skip_elems:]
3878                 skip_elems = None
3879             if only_more is not None:
3880                 if len(page) < only_more:
3881                     only_more -= len(page)
3882                 else:
3883                     page = page[:only_more]
3884                     res.extend(page)
3885                     break
3886             res.extend(page)
3887         return res
3888
3889
3890 def uppercase_escape(s):
3891     unicode_escape = codecs.getdecoder('unicode_escape')
3892     return re.sub(
3893         r'\\U[0-9a-fA-F]{8}',
3894         lambda m: unicode_escape(m.group(0))[0],
3895         s)
3896
3897
3898 def lowercase_escape(s):
3899     unicode_escape = codecs.getdecoder('unicode_escape')
3900     return re.sub(
3901         r'\\u[0-9a-fA-F]{4}',
3902         lambda m: unicode_escape(m.group(0))[0],
3903         s)
3904
3905
3906 def escape_rfc3986(s):
3907     """Escape non-ASCII characters as suggested by RFC 3986"""
3908     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3909         s = s.encode('utf-8')
3910     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3911
3912
3913 def escape_url(url):
3914     """Escape URL as suggested by RFC 3986"""
3915     url_parsed = compat_urllib_parse_urlparse(url)
3916     return url_parsed._replace(
3917         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3918         path=escape_rfc3986(url_parsed.path),
3919         params=escape_rfc3986(url_parsed.params),
3920         query=escape_rfc3986(url_parsed.query),
3921         fragment=escape_rfc3986(url_parsed.fragment)
3922     ).geturl()
3923
3924
3925 def read_batch_urls(batch_fd):
3926     def fixup(url):
3927         if not isinstance(url, compat_str):
3928             url = url.decode('utf-8', 'replace')
3929         BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3930         for bom in BOM_UTF8:
3931             if url.startswith(bom):
3932                 url = url[len(bom):]
3933         url = url.lstrip()
3934         if not url or url.startswith(('#', ';', ']')):
3935             return False
3936         # "#" cannot be stripped out since it is part of the URI
3937         # However, it can be safely stipped out if follwing a whitespace
3938         return re.split(r'\s#', url, 1)[0].rstrip()
3939
3940     with contextlib.closing(batch_fd) as fd:
3941         return [url for url in map(fixup, fd) if url]
3942
3943
3944 def urlencode_postdata(*args, **kargs):
3945     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3946
3947
3948 def update_url_query(url, query):
3949     if not query:
3950         return url
3951     parsed_url = compat_urlparse.urlparse(url)
3952     qs = compat_parse_qs(parsed_url.query)
3953     qs.update(query)
3954     return compat_urlparse.urlunparse(parsed_url._replace(
3955         query=compat_urllib_parse_urlencode(qs, True)))
3956
3957
3958 def update_Request(req, url=None, data=None, headers={}, query={}):
3959     req_headers = req.headers.copy()
3960     req_headers.update(headers)
3961     req_data = data or req.data
3962     req_url = update_url_query(url or req.get_full_url(), query)
3963     req_get_method = req.get_method()
3964     if req_get_method == 'HEAD':
3965         req_type = HEADRequest
3966     elif req_get_method == 'PUT':
3967         req_type = PUTRequest
3968     else:
3969         req_type = compat_urllib_request.Request
3970     new_req = req_type(
3971         req_url, data=req_data, headers=req_headers,
3972         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3973     if hasattr(req, 'timeout'):
3974         new_req.timeout = req.timeout
3975     return new_req
3976
3977
3978 def _multipart_encode_impl(data, boundary):
3979     content_type = 'multipart/form-data; boundary=%s' % boundary
3980
3981     out = b''
3982     for k, v in data.items():
3983         out += b'--' + boundary.encode('ascii') + b'\r\n'
3984         if isinstance(k, compat_str):
3985             k = k.encode('utf-8')
3986         if isinstance(v, compat_str):
3987             v = v.encode('utf-8')
3988         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3989         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3990         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3991         if boundary.encode('ascii') in content:
3992             raise ValueError('Boundary overlaps with data')
3993         out += content
3994
3995     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3996
3997     return out, content_type
3998
3999
4000 def multipart_encode(data, boundary=None):
4001     '''
4002     Encode a dict to RFC 7578-compliant form-data
4003
4004     data:
4005         A dict where keys and values can be either Unicode or bytes-like
4006         objects.
4007     boundary:
4008         If specified a Unicode object, it's used as the boundary. Otherwise
4009         a random boundary is generated.
4010
4011     Reference: https://tools.ietf.org/html/rfc7578
4012     '''
4013     has_specified_boundary = boundary is not None
4014
4015     while True:
4016         if boundary is None:
4017             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4018
4019         try:
4020             out, content_type = _multipart_encode_impl(data, boundary)
4021             break
4022         except ValueError:
4023             if has_specified_boundary:
4024                 raise
4025             boundary = None
4026
4027     return out, content_type
4028
4029
4030 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4031     if isinstance(key_or_keys, (list, tuple)):
4032         for key in key_or_keys:
4033             if key not in d or d[key] is None or skip_false_values and not d[key]:
4034                 continue
4035             return d[key]
4036         return default
4037     return d.get(key_or_keys, default)
4038
4039
4040 def try_get(src, getter, expected_type=None):
4041     if not isinstance(getter, (list, tuple)):
4042         getter = [getter]
4043     for get in getter:
4044         try:
4045             v = get(src)
4046         except (AttributeError, KeyError, TypeError, IndexError):
4047             pass
4048         else:
4049             if expected_type is None or isinstance(v, expected_type):
4050                 return v
4051
4052
4053 def merge_dicts(*dicts):
4054     merged = {}
4055     for a_dict in dicts:
4056         for k, v in a_dict.items():
4057             if v is None:
4058                 continue
4059             if (k not in merged
4060                     or (isinstance(v, compat_str) and v
4061                         and isinstance(merged[k], compat_str)
4062                         and not merged[k])):
4063                 merged[k] = v
4064     return merged
4065
4066
4067 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4068     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4069
4070
4071 US_RATINGS = {
4072     'G': 0,
4073     'PG': 10,
4074     'PG-13': 13,
4075     'R': 16,
4076     'NC': 18,
4077 }
4078
4079
4080 TV_PARENTAL_GUIDELINES = {
4081     'TV-Y': 0,
4082     'TV-Y7': 7,
4083     'TV-G': 0,
4084     'TV-PG': 0,
4085     'TV-14': 14,
4086     'TV-MA': 17,
4087 }
4088
4089
4090 def parse_age_limit(s):
4091     if type(s) == int:
4092         return s if 0 <= s <= 21 else None
4093     if not isinstance(s, compat_basestring):
4094         return None
4095     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4096     if m:
4097         return int(m.group('age'))
4098     if s in US_RATINGS:
4099         return US_RATINGS[s]
4100     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4101     if m:
4102         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4103     return None
4104
4105
4106 def strip_jsonp(code):
4107     return re.sub(
4108         r'''(?sx)^
4109             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4110             (?:\s*&&\s*(?P=func_name))?
4111             \s*\(\s*(?P<callback_data>.*)\);?
4112             \s*?(?://[^\n]*)*$''',
4113         r'\g<callback_data>', code)
4114
4115
4116 def js_to_json(code, vars={}):
4117     # vars is a dict of var, val pairs to substitute
4118     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4119     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4120     INTEGER_TABLE = (
4121         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4122         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4123     )
4124
4125     def fix_kv(m):
4126         v = m.group(0)
4127         if v in ('true', 'false', 'null'):
4128             return v
4129         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4130             return ""
4131
4132         if v[0] in ("'", '"'):
4133             v = re.sub(r'(?s)\\.|"', lambda m: {
4134                 '"': '\\"',
4135                 "\\'": "'",
4136                 '\\\n': '',
4137                 '\\x': '\\u00',
4138             }.get(m.group(0), m.group(0)), v[1:-1])
4139         else:
4140             for regex, base in INTEGER_TABLE:
4141                 im = re.match(regex, v)
4142                 if im:
4143                     i = int(im.group(1), base)
4144                     return '"%d":' % i if v.endswith(':') else '%d' % i
4145
4146             if v in vars:
4147                 return vars[v]
4148
4149         return '"%s"' % v
4150
4151     return re.sub(r'''(?sx)
4152         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4153         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4154         {comment}|,(?={skip}[\]}}])|
4155         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4156         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4157         [0-9]+(?={skip}:)|
4158         !+
4159         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4160
4161
4162 def qualities(quality_ids):
4163     """ Get a numeric quality value out of a list of possible values """
4164     def q(qid):
4165         try:
4166             return quality_ids.index(qid)
4167         except ValueError:
4168             return -1
4169     return q
4170
4171
4172 DEFAULT_OUTTMPL = {
4173     'default': '%(title)s [%(id)s].%(ext)s',
4174 }
4175 OUTTMPL_TYPES = {
4176     'subtitle': None,
4177     'thumbnail': None,
4178     'description': 'description',
4179     'annotation': 'annotations.xml',
4180     'infojson': 'info.json',
4181     'pl_description': 'description',
4182     'pl_infojson': 'info.json',
4183 }
4184
4185
4186 def limit_length(s, length):
4187     """ Add ellipses to overly long strings """
4188     if s is None:
4189         return None
4190     ELLIPSES = '...'
4191     if len(s) > length:
4192         return s[:length - len(ELLIPSES)] + ELLIPSES
4193     return s
4194
4195
4196 def version_tuple(v):
4197     return tuple(int(e) for e in re.split(r'[-.]', v))
4198
4199
4200 def is_outdated_version(version, limit, assume_new=True):
4201     if not version:
4202         return not assume_new
4203     try:
4204         return version_tuple(version) < version_tuple(limit)
4205     except ValueError:
4206         return not assume_new
4207
4208
4209 def ytdl_is_updateable():
4210     """ Returns if youtube-dlc can be updated with -U """
4211     return False
4212
4213     from zipimport import zipimporter
4214
4215     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4216
4217
4218 def args_to_str(args):
4219     # Get a short string representation for a subprocess command
4220     return ' '.join(compat_shlex_quote(a) for a in args)
4221
4222
4223 def error_to_compat_str(err):
4224     err_str = str(err)
4225     # On python 2 error byte string must be decoded with proper
4226     # encoding rather than ascii
4227     if sys.version_info[0] < 3:
4228         err_str = err_str.decode(preferredencoding())
4229     return err_str
4230
4231
4232 def mimetype2ext(mt):
4233     if mt is None:
4234         return None
4235
4236     ext = {
4237         'audio/mp4': 'm4a',
4238         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4239         # it's the most popular one
4240         'audio/mpeg': 'mp3',
4241         'audio/x-wav': 'wav',
4242     }.get(mt)
4243     if ext is not None:
4244         return ext
4245
4246     _, _, res = mt.rpartition('/')
4247     res = res.split(';')[0].strip().lower()
4248
4249     return {
4250         '3gpp': '3gp',
4251         'smptett+xml': 'tt',
4252         'ttaf+xml': 'dfxp',
4253         'ttml+xml': 'ttml',
4254         'x-flv': 'flv',
4255         'x-mp4-fragmented': 'mp4',
4256         'x-ms-sami': 'sami',
4257         'x-ms-wmv': 'wmv',
4258         'mpegurl': 'm3u8',
4259         'x-mpegurl': 'm3u8',
4260         'vnd.apple.mpegurl': 'm3u8',
4261         'dash+xml': 'mpd',
4262         'f4m+xml': 'f4m',
4263         'hds+xml': 'f4m',
4264         'vnd.ms-sstr+xml': 'ism',
4265         'quicktime': 'mov',
4266         'mp2t': 'ts',
4267         'x-wav': 'wav',
4268     }.get(res, res)
4269
4270
4271 def parse_codecs(codecs_str):
4272     # http://tools.ietf.org/html/rfc6381
4273     if not codecs_str:
4274         return {}
4275     split_codecs = list(filter(None, map(
4276         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4277     vcodec, acodec = None, None
4278     for full_codec in split_codecs:
4279         codec = full_codec.split('.')[0]
4280         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4281             if not vcodec:
4282                 vcodec = full_codec
4283         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4284             if not acodec:
4285                 acodec = full_codec
4286         else:
4287             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4288     if not vcodec and not acodec:
4289         if len(split_codecs) == 2:
4290             return {
4291                 'vcodec': split_codecs[0],
4292                 'acodec': split_codecs[1],
4293             }
4294     else:
4295         return {
4296             'vcodec': vcodec or 'none',
4297             'acodec': acodec or 'none',
4298         }
4299     return {}
4300
4301
4302 def urlhandle_detect_ext(url_handle):
4303     getheader = url_handle.headers.get
4304
4305     cd = getheader('Content-Disposition')
4306     if cd:
4307         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4308         if m:
4309             e = determine_ext(m.group('filename'), default_ext=None)
4310             if e:
4311                 return e
4312
4313     return mimetype2ext(getheader('Content-Type'))
4314
4315
4316 def encode_data_uri(data, mime_type):
4317     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4318
4319
4320 def age_restricted(content_limit, age_limit):
4321     """ Returns True iff the content should be blocked """
4322
4323     if age_limit is None:  # No limit set
4324         return False
4325     if content_limit is None:
4326         return False  # Content available for everyone
4327     return age_limit < content_limit
4328
4329
4330 def is_html(first_bytes):
4331     """ Detect whether a file contains HTML by examining its first bytes. """
4332
4333     BOMS = [
4334         (b'\xef\xbb\xbf', 'utf-8'),
4335         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4336         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4337         (b'\xff\xfe', 'utf-16-le'),
4338         (b'\xfe\xff', 'utf-16-be'),
4339     ]
4340     for bom, enc in BOMS:
4341         if first_bytes.startswith(bom):
4342             s = first_bytes[len(bom):].decode(enc, 'replace')
4343             break
4344     else:
4345         s = first_bytes.decode('utf-8', 'replace')
4346
4347     return re.match(r'^\s*<', s)
4348
4349
4350 def determine_protocol(info_dict):
4351     protocol = info_dict.get('protocol')
4352     if protocol is not None:
4353         return protocol
4354
4355     url = info_dict['url']
4356     if url.startswith('rtmp'):
4357         return 'rtmp'
4358     elif url.startswith('mms'):
4359         return 'mms'
4360     elif url.startswith('rtsp'):
4361         return 'rtsp'
4362
4363     ext = determine_ext(url)
4364     if ext == 'm3u8':
4365         return 'm3u8'
4366     elif ext == 'f4m':
4367         return 'f4m'
4368
4369     return compat_urllib_parse_urlparse(url).scheme
4370
4371
4372 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4373     """ Render a list of rows, each as a list of values """
4374
4375     def get_max_lens(table):
4376         return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4377
4378     def filter_using_list(row, filterArray):
4379         return [col for (take, col) in zip(filterArray, row) if take]
4380
4381     if hideEmpty:
4382         max_lens = get_max_lens(data)
4383         header_row = filter_using_list(header_row, max_lens)
4384         data = [filter_using_list(row, max_lens) for row in data]
4385
4386     table = [header_row] + data
4387     max_lens = get_max_lens(table)
4388     if delim:
4389         table = [header_row] + [['-' * ml for ml in max_lens]] + data
4390     format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4391     return '\n'.join(format_str % tuple(row) for row in table)
4392
4393
4394 def _match_one(filter_part, dct):
4395     COMPARISON_OPERATORS = {
4396         '<': operator.lt,
4397         '<=': operator.le,
4398         '>': operator.gt,
4399         '>=': operator.ge,
4400         '=': operator.eq,
4401         '!=': operator.ne,
4402     }
4403     operator_rex = re.compile(r'''(?x)\s*
4404         (?P<key>[a-z_]+)
4405         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4406         (?:
4407             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4408             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4409             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4410         )
4411         \s*$
4412         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4413     m = operator_rex.search(filter_part)
4414     if m:
4415         op = COMPARISON_OPERATORS[m.group('op')]
4416         actual_value = dct.get(m.group('key'))
4417         if (m.group('quotedstrval') is not None
4418             or m.group('strval') is not None
4419             # If the original field is a string and matching comparisonvalue is
4420             # a number we should respect the origin of the original field
4421             # and process comparison value as a string (see
4422             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4423             or actual_value is not None and m.group('intval') is not None
4424                 and isinstance(actual_value, compat_str)):
4425             if m.group('op') not in ('=', '!='):
4426                 raise ValueError(
4427                     'Operator %s does not support string values!' % m.group('op'))
4428             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4429             quote = m.group('quote')
4430             if quote is not None:
4431                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4432         else:
4433             try:
4434                 comparison_value = int(m.group('intval'))
4435             except ValueError:
4436                 comparison_value = parse_filesize(m.group('intval'))
4437                 if comparison_value is None:
4438                     comparison_value = parse_filesize(m.group('intval') + 'B')
4439                 if comparison_value is None:
4440                     raise ValueError(
4441                         'Invalid integer value %r in filter part %r' % (
4442                             m.group('intval'), filter_part))
4443         if actual_value is None:
4444             return m.group('none_inclusive')
4445         return op(actual_value, comparison_value)
4446
4447     UNARY_OPERATORS = {
4448         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4449         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4450     }
4451     operator_rex = re.compile(r'''(?x)\s*
4452         (?P<op>%s)\s*(?P<key>[a-z_]+)
4453         \s*$
4454         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4455     m = operator_rex.search(filter_part)
4456     if m:
4457         op = UNARY_OPERATORS[m.group('op')]
4458         actual_value = dct.get(m.group('key'))
4459         return op(actual_value)
4460
4461     raise ValueError('Invalid filter part %r' % filter_part)
4462
4463
4464 def match_str(filter_str, dct):
4465     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4466
4467     return all(
4468         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4469
4470
4471 def match_filter_func(filter_str):
4472     def _match_func(info_dict):
4473         if match_str(filter_str, info_dict):
4474             return None
4475         else:
4476             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4477             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4478     return _match_func
4479
4480
4481 def parse_dfxp_time_expr(time_expr):
4482     if not time_expr:
4483         return
4484
4485     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4486     if mobj:
4487         return float(mobj.group('time_offset'))
4488
4489     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4490     if mobj:
4491         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4492
4493
4494 def srt_subtitles_timecode(seconds):
4495     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4496
4497
4498 def dfxp2srt(dfxp_data):
4499     '''
4500     @param dfxp_data A bytes-like object containing DFXP data
4501     @returns A unicode object containing converted SRT data
4502     '''
4503     LEGACY_NAMESPACES = (
4504         (b'http://www.w3.org/ns/ttml', [
4505             b'http://www.w3.org/2004/11/ttaf1',
4506             b'http://www.w3.org/2006/04/ttaf1',
4507             b'http://www.w3.org/2006/10/ttaf1',
4508         ]),
4509         (b'http://www.w3.org/ns/ttml#styling', [
4510             b'http://www.w3.org/ns/ttml#style',
4511         ]),
4512     )
4513
4514     SUPPORTED_STYLING = [
4515         'color',
4516         'fontFamily',
4517         'fontSize',
4518         'fontStyle',
4519         'fontWeight',
4520         'textDecoration'
4521     ]
4522
4523     _x = functools.partial(xpath_with_ns, ns_map={
4524         'xml': 'http://www.w3.org/XML/1998/namespace',
4525         'ttml': 'http://www.w3.org/ns/ttml',
4526         'tts': 'http://www.w3.org/ns/ttml#styling',
4527     })
4528
4529     styles = {}
4530     default_style = {}
4531
4532     class TTMLPElementParser(object):
4533         _out = ''
4534         _unclosed_elements = []
4535         _applied_styles = []
4536
4537         def start(self, tag, attrib):
4538             if tag in (_x('ttml:br'), 'br'):
4539                 self._out += '\n'
4540             else:
4541                 unclosed_elements = []
4542                 style = {}
4543                 element_style_id = attrib.get('style')
4544                 if default_style:
4545                     style.update(default_style)
4546                 if element_style_id:
4547                     style.update(styles.get(element_style_id, {}))
4548                 for prop in SUPPORTED_STYLING:
4549                     prop_val = attrib.get(_x('tts:' + prop))
4550                     if prop_val:
4551                         style[prop] = prop_val
4552                 if style:
4553                     font = ''
4554                     for k, v in sorted(style.items()):
4555                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4556                             continue
4557                         if k == 'color':
4558                             font += ' color="%s"' % v
4559                         elif k == 'fontSize':
4560                             font += ' size="%s"' % v
4561                         elif k == 'fontFamily':
4562                             font += ' face="%s"' % v
4563                         elif k == 'fontWeight' and v == 'bold':
4564                             self._out += '<b>'
4565                             unclosed_elements.append('b')
4566                         elif k == 'fontStyle' and v == 'italic':
4567                             self._out += '<i>'
4568                             unclosed_elements.append('i')
4569                         elif k == 'textDecoration' and v == 'underline':
4570                             self._out += '<u>'
4571                             unclosed_elements.append('u')
4572                     if font:
4573                         self._out += '<font' + font + '>'
4574                         unclosed_elements.append('font')
4575                     applied_style = {}
4576                     if self._applied_styles:
4577                         applied_style.update(self._applied_styles[-1])
4578                     applied_style.update(style)
4579                     self._applied_styles.append(applied_style)
4580                 self._unclosed_elements.append(unclosed_elements)
4581
4582         def end(self, tag):
4583             if tag not in (_x('ttml:br'), 'br'):
4584                 unclosed_elements = self._unclosed_elements.pop()
4585                 for element in reversed(unclosed_elements):
4586                     self._out += '</%s>' % element
4587                 if unclosed_elements and self._applied_styles:
4588                     self._applied_styles.pop()
4589
4590         def data(self, data):
4591             self._out += data
4592
4593         def close(self):
4594             return self._out.strip()
4595
4596     def parse_node(node):
4597         target = TTMLPElementParser()
4598         parser = xml.etree.ElementTree.XMLParser(target=target)
4599         parser.feed(xml.etree.ElementTree.tostring(node))
4600         return parser.close()
4601
4602     for k, v in LEGACY_NAMESPACES:
4603         for ns in v:
4604             dfxp_data = dfxp_data.replace(ns, k)
4605
4606     dfxp = compat_etree_fromstring(dfxp_data)
4607     out = []
4608     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4609
4610     if not paras:
4611         raise ValueError('Invalid dfxp/TTML subtitle')
4612
4613     repeat = False
4614     while True:
4615         for style in dfxp.findall(_x('.//ttml:style')):
4616             style_id = style.get('id') or style.get(_x('xml:id'))
4617             if not style_id:
4618                 continue
4619             parent_style_id = style.get('style')
4620             if parent_style_id:
4621                 if parent_style_id not in styles:
4622                     repeat = True
4623                     continue
4624                 styles[style_id] = styles[parent_style_id].copy()
4625             for prop in SUPPORTED_STYLING:
4626                 prop_val = style.get(_x('tts:' + prop))
4627                 if prop_val:
4628                     styles.setdefault(style_id, {})[prop] = prop_val
4629         if repeat:
4630             repeat = False
4631         else:
4632             break
4633
4634     for p in ('body', 'div'):
4635         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4636         if ele is None:
4637             continue
4638         style = styles.get(ele.get('style'))
4639         if not style:
4640             continue
4641         default_style.update(style)
4642
4643     for para, index in zip(paras, itertools.count(1)):
4644         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4645         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4646         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4647         if begin_time is None:
4648             continue
4649         if not end_time:
4650             if not dur:
4651                 continue
4652             end_time = begin_time + dur
4653         out.append('%d\n%s --> %s\n%s\n\n' % (
4654             index,
4655             srt_subtitles_timecode(begin_time),
4656             srt_subtitles_timecode(end_time),
4657             parse_node(para)))
4658
4659     return ''.join(out)
4660
4661
4662 def cli_option(params, command_option, param):
4663     param = params.get(param)
4664     if param:
4665         param = compat_str(param)
4666     return [command_option, param] if param is not None else []
4667
4668
4669 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4670     param = params.get(param)
4671     if param is None:
4672         return []
4673     assert isinstance(param, bool)
4674     if separator:
4675         return [command_option + separator + (true_value if param else false_value)]
4676     return [command_option, true_value if param else false_value]
4677
4678
4679 def cli_valueless_option(params, command_option, param, expected_value=True):
4680     param = params.get(param)
4681     return [command_option] if param == expected_value else []
4682
4683
4684 def cli_configuration_args(params, arg_name, key, default=[], exe=None):  # returns arg, for_compat
4685     argdict = params.get(arg_name, {})
4686     if isinstance(argdict, (list, tuple)):  # for backward compatibility
4687         return argdict, True
4688
4689     if argdict is None:
4690         return default, False
4691     assert isinstance(argdict, dict)
4692
4693     assert isinstance(key, compat_str)
4694     key = key.lower()
4695
4696     args = exe_args = None
4697     if exe is not None:
4698         assert isinstance(exe, compat_str)
4699         exe = exe.lower()
4700         args = argdict.get('%s+%s' % (key, exe))
4701         if args is None:
4702             exe_args = argdict.get(exe)
4703
4704     if args is None:
4705         args = argdict.get(key) if key != exe else None
4706     if args is None and exe_args is None:
4707         args = argdict.get('default', default)
4708
4709     args, exe_args = args or [], exe_args or []
4710     assert isinstance(args, (list, tuple))
4711     assert isinstance(exe_args, (list, tuple))
4712     return args + exe_args, False
4713
4714
4715 class ISO639Utils(object):
4716     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4717     _lang_map = {
4718         'aa': 'aar',
4719         'ab': 'abk',
4720         'ae': 'ave',
4721         'af': 'afr',
4722         'ak': 'aka',
4723         'am': 'amh',
4724         'an': 'arg',
4725         'ar': 'ara',
4726         'as': 'asm',
4727         'av': 'ava',
4728         'ay': 'aym',
4729         'az': 'aze',
4730         'ba': 'bak',
4731         'be': 'bel',
4732         'bg': 'bul',
4733         'bh': 'bih',
4734         'bi': 'bis',
4735         'bm': 'bam',
4736         'bn': 'ben',
4737         'bo': 'bod',
4738         'br': 'bre',
4739         'bs': 'bos',
4740         'ca': 'cat',
4741         'ce': 'che',
4742         'ch': 'cha',
4743         'co': 'cos',
4744         'cr': 'cre',
4745         'cs': 'ces',
4746         'cu': 'chu',
4747         'cv': 'chv',
4748         'cy': 'cym',
4749         'da': 'dan',
4750         'de': 'deu',
4751         'dv': 'div',
4752         'dz': 'dzo',
4753         'ee': 'ewe',
4754         'el': 'ell',
4755         'en': 'eng',
4756         'eo': 'epo',
4757         'es': 'spa',
4758         'et': 'est',
4759         'eu': 'eus',
4760         'fa': 'fas',
4761         'ff': 'ful',
4762         'fi': 'fin',
4763         'fj': 'fij',
4764         'fo': 'fao',
4765         'fr': 'fra',
4766         'fy': 'fry',
4767         'ga': 'gle',
4768         'gd': 'gla',
4769         'gl': 'glg',
4770         'gn': 'grn',
4771         'gu': 'guj',
4772         'gv': 'glv',
4773         'ha': 'hau',
4774         'he': 'heb',
4775         'iw': 'heb',  # Replaced by he in 1989 revision
4776         'hi': 'hin',
4777         'ho': 'hmo',
4778         'hr': 'hrv',
4779         'ht': 'hat',
4780         'hu': 'hun',
4781         'hy': 'hye',
4782         'hz': 'her',
4783         'ia': 'ina',
4784         'id': 'ind',
4785         'in': 'ind',  # Replaced by id in 1989 revision
4786         'ie': 'ile',
4787         'ig': 'ibo',
4788         'ii': 'iii',
4789         'ik': 'ipk',
4790         'io': 'ido',
4791         'is': 'isl',
4792         'it': 'ita',
4793         'iu': 'iku',
4794         'ja': 'jpn',
4795         'jv': 'jav',
4796         'ka': 'kat',
4797         'kg': 'kon',
4798         'ki': 'kik',
4799         'kj': 'kua',
4800         'kk': 'kaz',
4801         'kl': 'kal',
4802         'km': 'khm',
4803         'kn': 'kan',
4804         'ko': 'kor',
4805         'kr': 'kau',
4806         'ks': 'kas',
4807         'ku': 'kur',
4808         'kv': 'kom',
4809         'kw': 'cor',
4810         'ky': 'kir',
4811         'la': 'lat',
4812         'lb': 'ltz',
4813         'lg': 'lug',
4814         'li': 'lim',
4815         'ln': 'lin',
4816         'lo': 'lao',
4817         'lt': 'lit',
4818         'lu': 'lub',
4819         'lv': 'lav',
4820         'mg': 'mlg',
4821         'mh': 'mah',
4822         'mi': 'mri',
4823         'mk': 'mkd',
4824         'ml': 'mal',
4825         'mn': 'mon',
4826         'mr': 'mar',
4827         'ms': 'msa',
4828         'mt': 'mlt',
4829         'my': 'mya',
4830         'na': 'nau',
4831         'nb': 'nob',
4832         'nd': 'nde',
4833         'ne': 'nep',
4834         'ng': 'ndo',
4835         'nl': 'nld',
4836         'nn': 'nno',
4837         'no': 'nor',
4838         'nr': 'nbl',
4839         'nv': 'nav',
4840         'ny': 'nya',
4841         'oc': 'oci',
4842         'oj': 'oji',
4843         'om': 'orm',
4844         'or': 'ori',
4845         'os': 'oss',
4846         'pa': 'pan',
4847         'pi': 'pli',
4848         'pl': 'pol',
4849         'ps': 'pus',
4850         'pt': 'por',
4851         'qu': 'que',
4852         'rm': 'roh',
4853         'rn': 'run',
4854         'ro': 'ron',
4855         'ru': 'rus',
4856         'rw': 'kin',
4857         'sa': 'san',
4858         'sc': 'srd',
4859         'sd': 'snd',
4860         'se': 'sme',
4861         'sg': 'sag',
4862         'si': 'sin',
4863         'sk': 'slk',
4864         'sl': 'slv',
4865         'sm': 'smo',
4866         'sn': 'sna',
4867         'so': 'som',
4868         'sq': 'sqi',
4869         'sr': 'srp',
4870         'ss': 'ssw',
4871         'st': 'sot',
4872         'su': 'sun',
4873         'sv': 'swe',
4874         'sw': 'swa',
4875         'ta': 'tam',
4876         'te': 'tel',
4877         'tg': 'tgk',
4878         'th': 'tha',
4879         'ti': 'tir',
4880         'tk': 'tuk',
4881         'tl': 'tgl',
4882         'tn': 'tsn',
4883         'to': 'ton',
4884         'tr': 'tur',
4885         'ts': 'tso',
4886         'tt': 'tat',
4887         'tw': 'twi',
4888         'ty': 'tah',
4889         'ug': 'uig',
4890         'uk': 'ukr',
4891         'ur': 'urd',
4892         'uz': 'uzb',
4893         've': 'ven',
4894         'vi': 'vie',
4895         'vo': 'vol',
4896         'wa': 'wln',
4897         'wo': 'wol',
4898         'xh': 'xho',
4899         'yi': 'yid',
4900         'ji': 'yid',  # Replaced by yi in 1989 revision
4901         'yo': 'yor',
4902         'za': 'zha',
4903         'zh': 'zho',
4904         'zu': 'zul',
4905     }
4906
4907     @classmethod
4908     def short2long(cls, code):
4909         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4910         return cls._lang_map.get(code[:2])
4911
4912     @classmethod
4913     def long2short(cls, code):
4914         """Convert language code from ISO 639-2/T to ISO 639-1"""
4915         for short_name, long_name in cls._lang_map.items():
4916             if long_name == code:
4917                 return short_name
4918
4919
4920 class ISO3166Utils(object):
4921     # From http://data.okfn.org/data/core/country-list
4922     _country_map = {
4923         'AF': 'Afghanistan',
4924         'AX': 'Åland Islands',
4925         'AL': 'Albania',
4926         'DZ': 'Algeria',
4927         'AS': 'American Samoa',
4928         'AD': 'Andorra',
4929         'AO': 'Angola',
4930         'AI': 'Anguilla',
4931         'AQ': 'Antarctica',
4932         'AG': 'Antigua and Barbuda',
4933         'AR': 'Argentina',
4934         'AM': 'Armenia',
4935         'AW': 'Aruba',
4936         'AU': 'Australia',
4937         'AT': 'Austria',
4938         'AZ': 'Azerbaijan',
4939         'BS': 'Bahamas',
4940         'BH': 'Bahrain',
4941         'BD': 'Bangladesh',
4942         'BB': 'Barbados',
4943         'BY': 'Belarus',
4944         'BE': 'Belgium',
4945         'BZ': 'Belize',
4946         'BJ': 'Benin',
4947         'BM': 'Bermuda',
4948         'BT': 'Bhutan',
4949         'BO': 'Bolivia, Plurinational State of',
4950         'BQ': 'Bonaire, Sint Eustatius and Saba',
4951         'BA': 'Bosnia and Herzegovina',
4952         'BW': 'Botswana',
4953         'BV': 'Bouvet Island',
4954         'BR': 'Brazil',
4955         'IO': 'British Indian Ocean Territory',
4956         'BN': 'Brunei Darussalam',
4957         'BG': 'Bulgaria',
4958         'BF': 'Burkina Faso',
4959         'BI': 'Burundi',
4960         'KH': 'Cambodia',
4961         'CM': 'Cameroon',
4962         'CA': 'Canada',
4963         'CV': 'Cape Verde',
4964         'KY': 'Cayman Islands',
4965         'CF': 'Central African Republic',
4966         'TD': 'Chad',
4967         'CL': 'Chile',
4968         'CN': 'China',
4969         'CX': 'Christmas Island',
4970         'CC': 'Cocos (Keeling) Islands',
4971         'CO': 'Colombia',
4972         'KM': 'Comoros',
4973         'CG': 'Congo',
4974         'CD': 'Congo, the Democratic Republic of the',
4975         'CK': 'Cook Islands',
4976         'CR': 'Costa Rica',
4977         'CI': 'Côte d\'Ivoire',
4978         'HR': 'Croatia',
4979         'CU': 'Cuba',
4980         'CW': 'Curaçao',
4981         'CY': 'Cyprus',
4982         'CZ': 'Czech Republic',
4983         'DK': 'Denmark',
4984         'DJ': 'Djibouti',
4985         'DM': 'Dominica',
4986         'DO': 'Dominican Republic',
4987         'EC': 'Ecuador',
4988         'EG': 'Egypt',
4989         'SV': 'El Salvador',
4990         'GQ': 'Equatorial Guinea',
4991         'ER': 'Eritrea',
4992         'EE': 'Estonia',
4993         'ET': 'Ethiopia',
4994         'FK': 'Falkland Islands (Malvinas)',
4995         'FO': 'Faroe Islands',
4996         'FJ': 'Fiji',
4997         'FI': 'Finland',
4998         'FR': 'France',
4999         'GF': 'French Guiana',
5000         'PF': 'French Polynesia',
5001         'TF': 'French Southern Territories',
5002         'GA': 'Gabon',
5003         'GM': 'Gambia',
5004         'GE': 'Georgia',
5005         'DE': 'Germany',
5006         'GH': 'Ghana',
5007         'GI': 'Gibraltar',
5008         'GR': 'Greece',
5009         'GL': 'Greenland',
5010         'GD': 'Grenada',
5011         'GP': 'Guadeloupe',
5012         'GU': 'Guam',
5013         'GT': 'Guatemala',
5014         'GG': 'Guernsey',
5015         'GN': 'Guinea',
5016         'GW': 'Guinea-Bissau',
5017         'GY': 'Guyana',
5018         'HT': 'Haiti',
5019         'HM': 'Heard Island and McDonald Islands',
5020         'VA': 'Holy See (Vatican City State)',
5021         'HN': 'Honduras',
5022         'HK': 'Hong Kong',
5023         'HU': 'Hungary',
5024         'IS': 'Iceland',
5025         'IN': 'India',
5026         'ID': 'Indonesia',
5027         'IR': 'Iran, Islamic Republic of',
5028         'IQ': 'Iraq',
5029         'IE': 'Ireland',
5030         'IM': 'Isle of Man',
5031         'IL': 'Israel',
5032         'IT': 'Italy',
5033         'JM': 'Jamaica',
5034         'JP': 'Japan',
5035         'JE': 'Jersey',
5036         'JO': 'Jordan',
5037         'KZ': 'Kazakhstan',
5038         'KE': 'Kenya',
5039         'KI': 'Kiribati',
5040         'KP': 'Korea, Democratic People\'s Republic of',
5041         'KR': 'Korea, Republic of',
5042         'KW': 'Kuwait',
5043         'KG': 'Kyrgyzstan',
5044         'LA': 'Lao People\'s Democratic Republic',
5045         'LV': 'Latvia',
5046         'LB': 'Lebanon',
5047         'LS': 'Lesotho',
5048         'LR': 'Liberia',
5049         'LY': 'Libya',
5050         'LI': 'Liechtenstein',
5051         'LT': 'Lithuania',
5052         'LU': 'Luxembourg',
5053         'MO': 'Macao',
5054         'MK': 'Macedonia, the Former Yugoslav Republic of',
5055         'MG': 'Madagascar',
5056         'MW': 'Malawi',
5057         'MY': 'Malaysia',
5058         'MV': 'Maldives',
5059         'ML': 'Mali',
5060         'MT': 'Malta',
5061         'MH': 'Marshall Islands',
5062         'MQ': 'Martinique',
5063         'MR': 'Mauritania',
5064         'MU': 'Mauritius',
5065         'YT': 'Mayotte',
5066         'MX': 'Mexico',
5067         'FM': 'Micronesia, Federated States of',
5068         'MD': 'Moldova, Republic of',
5069         'MC': 'Monaco',
5070         'MN': 'Mongolia',
5071         'ME': 'Montenegro',
5072         'MS': 'Montserrat',
5073         'MA': 'Morocco',
5074         'MZ': 'Mozambique',
5075         'MM': 'Myanmar',
5076         'NA': 'Namibia',
5077         'NR': 'Nauru',
5078         'NP': 'Nepal',
5079         'NL': 'Netherlands',
5080         'NC': 'New Caledonia',
5081         'NZ': 'New Zealand',
5082         'NI': 'Nicaragua',
5083         'NE': 'Niger',
5084         'NG': 'Nigeria',
5085         'NU': 'Niue',
5086         'NF': 'Norfolk Island',
5087         'MP': 'Northern Mariana Islands',
5088         'NO': 'Norway',
5089         'OM': 'Oman',
5090         'PK': 'Pakistan',
5091         'PW': 'Palau',
5092         'PS': 'Palestine, State of',
5093         'PA': 'Panama',
5094         'PG': 'Papua New Guinea',
5095         'PY': 'Paraguay',
5096         'PE': 'Peru',
5097         'PH': 'Philippines',
5098         'PN': 'Pitcairn',
5099         'PL': 'Poland',
5100         'PT': 'Portugal',
5101         'PR': 'Puerto Rico',
5102         'QA': 'Qatar',
5103         'RE': 'Réunion',
5104         'RO': 'Romania',
5105         'RU': 'Russian Federation',
5106         'RW': 'Rwanda',
5107         'BL': 'Saint Barthélemy',
5108         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5109         'KN': 'Saint Kitts and Nevis',
5110         'LC': 'Saint Lucia',
5111         'MF': 'Saint Martin (French part)',
5112         'PM': 'Saint Pierre and Miquelon',
5113         'VC': 'Saint Vincent and the Grenadines',
5114         'WS': 'Samoa',
5115         'SM': 'San Marino',
5116         'ST': 'Sao Tome and Principe',
5117         'SA': 'Saudi Arabia',
5118         'SN': 'Senegal',
5119         'RS': 'Serbia',
5120         'SC': 'Seychelles',
5121         'SL': 'Sierra Leone',
5122         'SG': 'Singapore',
5123         'SX': 'Sint Maarten (Dutch part)',
5124         'SK': 'Slovakia',
5125         'SI': 'Slovenia',
5126         'SB': 'Solomon Islands',
5127         'SO': 'Somalia',
5128         'ZA': 'South Africa',
5129         'GS': 'South Georgia and the South Sandwich Islands',
5130         'SS': 'South Sudan',
5131         'ES': 'Spain',
5132         'LK': 'Sri Lanka',
5133         'SD': 'Sudan',
5134         'SR': 'Suriname',
5135         'SJ': 'Svalbard and Jan Mayen',
5136         'SZ': 'Swaziland',
5137         'SE': 'Sweden',
5138         'CH': 'Switzerland',
5139         'SY': 'Syrian Arab Republic',
5140         'TW': 'Taiwan, Province of China',
5141         'TJ': 'Tajikistan',
5142         'TZ': 'Tanzania, United Republic of',
5143         'TH': 'Thailand',
5144         'TL': 'Timor-Leste',
5145         'TG': 'Togo',
5146         'TK': 'Tokelau',
5147         'TO': 'Tonga',
5148         'TT': 'Trinidad and Tobago',
5149         'TN': 'Tunisia',
5150         'TR': 'Turkey',
5151         'TM': 'Turkmenistan',
5152         'TC': 'Turks and Caicos Islands',
5153         'TV': 'Tuvalu',
5154         'UG': 'Uganda',
5155         'UA': 'Ukraine',
5156         'AE': 'United Arab Emirates',
5157         'GB': 'United Kingdom',
5158         'US': 'United States',
5159         'UM': 'United States Minor Outlying Islands',
5160         'UY': 'Uruguay',
5161         'UZ': 'Uzbekistan',
5162         'VU': 'Vanuatu',
5163         'VE': 'Venezuela, Bolivarian Republic of',
5164         'VN': 'Viet Nam',
5165         'VG': 'Virgin Islands, British',
5166         'VI': 'Virgin Islands, U.S.',
5167         'WF': 'Wallis and Futuna',
5168         'EH': 'Western Sahara',
5169         'YE': 'Yemen',
5170         'ZM': 'Zambia',
5171         'ZW': 'Zimbabwe',
5172     }
5173
5174     @classmethod
5175     def short2full(cls, code):
5176         """Convert an ISO 3166-2 country code to the corresponding full name"""
5177         return cls._country_map.get(code.upper())
5178
5179
5180 class GeoUtils(object):
5181     # Major IPv4 address blocks per country
5182     _country_ip_map = {
5183         'AD': '46.172.224.0/19',
5184         'AE': '94.200.0.0/13',
5185         'AF': '149.54.0.0/17',
5186         'AG': '209.59.64.0/18',
5187         'AI': '204.14.248.0/21',
5188         'AL': '46.99.0.0/16',
5189         'AM': '46.70.0.0/15',
5190         'AO': '105.168.0.0/13',
5191         'AP': '182.50.184.0/21',
5192         'AQ': '23.154.160.0/24',
5193         'AR': '181.0.0.0/12',
5194         'AS': '202.70.112.0/20',
5195         'AT': '77.116.0.0/14',
5196         'AU': '1.128.0.0/11',
5197         'AW': '181.41.0.0/18',
5198         'AX': '185.217.4.0/22',
5199         'AZ': '5.197.0.0/16',
5200         'BA': '31.176.128.0/17',
5201         'BB': '65.48.128.0/17',
5202         'BD': '114.130.0.0/16',
5203         'BE': '57.0.0.0/8',
5204         'BF': '102.178.0.0/15',
5205         'BG': '95.42.0.0/15',
5206         'BH': '37.131.0.0/17',
5207         'BI': '154.117.192.0/18',
5208         'BJ': '137.255.0.0/16',
5209         'BL': '185.212.72.0/23',
5210         'BM': '196.12.64.0/18',
5211         'BN': '156.31.0.0/16',
5212         'BO': '161.56.0.0/16',
5213         'BQ': '161.0.80.0/20',
5214         'BR': '191.128.0.0/12',
5215         'BS': '24.51.64.0/18',
5216         'BT': '119.2.96.0/19',
5217         'BW': '168.167.0.0/16',
5218         'BY': '178.120.0.0/13',
5219         'BZ': '179.42.192.0/18',
5220         'CA': '99.224.0.0/11',
5221         'CD': '41.243.0.0/16',
5222         'CF': '197.242.176.0/21',
5223         'CG': '160.113.0.0/16',
5224         'CH': '85.0.0.0/13',
5225         'CI': '102.136.0.0/14',
5226         'CK': '202.65.32.0/19',
5227         'CL': '152.172.0.0/14',
5228         'CM': '102.244.0.0/14',
5229         'CN': '36.128.0.0/10',
5230         'CO': '181.240.0.0/12',
5231         'CR': '201.192.0.0/12',
5232         'CU': '152.206.0.0/15',
5233         'CV': '165.90.96.0/19',
5234         'CW': '190.88.128.0/17',
5235         'CY': '31.153.0.0/16',
5236         'CZ': '88.100.0.0/14',
5237         'DE': '53.0.0.0/8',
5238         'DJ': '197.241.0.0/17',
5239         'DK': '87.48.0.0/12',
5240         'DM': '192.243.48.0/20',
5241         'DO': '152.166.0.0/15',
5242         'DZ': '41.96.0.0/12',
5243         'EC': '186.68.0.0/15',
5244         'EE': '90.190.0.0/15',
5245         'EG': '156.160.0.0/11',
5246         'ER': '196.200.96.0/20',
5247         'ES': '88.0.0.0/11',
5248         'ET': '196.188.0.0/14',
5249         'EU': '2.16.0.0/13',
5250         'FI': '91.152.0.0/13',
5251         'FJ': '144.120.0.0/16',
5252         'FK': '80.73.208.0/21',
5253         'FM': '119.252.112.0/20',
5254         'FO': '88.85.32.0/19',
5255         'FR': '90.0.0.0/9',
5256         'GA': '41.158.0.0/15',
5257         'GB': '25.0.0.0/8',
5258         'GD': '74.122.88.0/21',
5259         'GE': '31.146.0.0/16',
5260         'GF': '161.22.64.0/18',
5261         'GG': '62.68.160.0/19',
5262         'GH': '154.160.0.0/12',
5263         'GI': '95.164.0.0/16',
5264         'GL': '88.83.0.0/19',
5265         'GM': '160.182.0.0/15',
5266         'GN': '197.149.192.0/18',
5267         'GP': '104.250.0.0/19',
5268         'GQ': '105.235.224.0/20',
5269         'GR': '94.64.0.0/13',
5270         'GT': '168.234.0.0/16',
5271         'GU': '168.123.0.0/16',
5272         'GW': '197.214.80.0/20',
5273         'GY': '181.41.64.0/18',
5274         'HK': '113.252.0.0/14',
5275         'HN': '181.210.0.0/16',
5276         'HR': '93.136.0.0/13',
5277         'HT': '148.102.128.0/17',
5278         'HU': '84.0.0.0/14',
5279         'ID': '39.192.0.0/10',
5280         'IE': '87.32.0.0/12',
5281         'IL': '79.176.0.0/13',
5282         'IM': '5.62.80.0/20',
5283         'IN': '117.192.0.0/10',
5284         'IO': '203.83.48.0/21',
5285         'IQ': '37.236.0.0/14',
5286         'IR': '2.176.0.0/12',
5287         'IS': '82.221.0.0/16',
5288         'IT': '79.0.0.0/10',
5289         'JE': '87.244.64.0/18',
5290         'JM': '72.27.0.0/17',
5291         'JO': '176.29.0.0/16',
5292         'JP': '133.0.0.0/8',
5293         'KE': '105.48.0.0/12',
5294         'KG': '158.181.128.0/17',
5295         'KH': '36.37.128.0/17',
5296         'KI': '103.25.140.0/22',
5297         'KM': '197.255.224.0/20',
5298         'KN': '198.167.192.0/19',
5299         'KP': '175.45.176.0/22',
5300         'KR': '175.192.0.0/10',
5301         'KW': '37.36.0.0/14',
5302         'KY': '64.96.0.0/15',
5303         'KZ': '2.72.0.0/13',
5304         'LA': '115.84.64.0/18',
5305         'LB': '178.135.0.0/16',
5306         'LC': '24.92.144.0/20',
5307         'LI': '82.117.0.0/19',
5308         'LK': '112.134.0.0/15',
5309         'LR': '102.183.0.0/16',
5310         'LS': '129.232.0.0/17',
5311         'LT': '78.56.0.0/13',
5312         'LU': '188.42.0.0/16',
5313         'LV': '46.109.0.0/16',
5314         'LY': '41.252.0.0/14',
5315         'MA': '105.128.0.0/11',
5316         'MC': '88.209.64.0/18',
5317         'MD': '37.246.0.0/16',
5318         'ME': '178.175.0.0/17',
5319         'MF': '74.112.232.0/21',
5320         'MG': '154.126.0.0/17',
5321         'MH': '117.103.88.0/21',
5322         'MK': '77.28.0.0/15',
5323         'ML': '154.118.128.0/18',
5324         'MM': '37.111.0.0/17',
5325         'MN': '49.0.128.0/17',
5326         'MO': '60.246.0.0/16',
5327         'MP': '202.88.64.0/20',
5328         'MQ': '109.203.224.0/19',
5329         'MR': '41.188.64.0/18',
5330         'MS': '208.90.112.0/22',
5331         'MT': '46.11.0.0/16',
5332         'MU': '105.16.0.0/12',
5333         'MV': '27.114.128.0/18',
5334         'MW': '102.70.0.0/15',
5335         'MX': '187.192.0.0/11',
5336         'MY': '175.136.0.0/13',
5337         'MZ': '197.218.0.0/15',
5338         'NA': '41.182.0.0/16',
5339         'NC': '101.101.0.0/18',
5340         'NE': '197.214.0.0/18',
5341         'NF': '203.17.240.0/22',
5342         'NG': '105.112.0.0/12',
5343         'NI': '186.76.0.0/15',
5344         'NL': '145.96.0.0/11',
5345         'NO': '84.208.0.0/13',
5346         'NP': '36.252.0.0/15',
5347         'NR': '203.98.224.0/19',
5348         'NU': '49.156.48.0/22',
5349         'NZ': '49.224.0.0/14',
5350         'OM': '5.36.0.0/15',
5351         'PA': '186.72.0.0/15',
5352         'PE': '186.160.0.0/14',
5353         'PF': '123.50.64.0/18',
5354         'PG': '124.240.192.0/19',
5355         'PH': '49.144.0.0/13',
5356         'PK': '39.32.0.0/11',
5357         'PL': '83.0.0.0/11',
5358         'PM': '70.36.0.0/20',
5359         'PR': '66.50.0.0/16',
5360         'PS': '188.161.0.0/16',
5361         'PT': '85.240.0.0/13',
5362         'PW': '202.124.224.0/20',
5363         'PY': '181.120.0.0/14',
5364         'QA': '37.210.0.0/15',
5365         'RE': '102.35.0.0/16',
5366         'RO': '79.112.0.0/13',
5367         'RS': '93.86.0.0/15',
5368         'RU': '5.136.0.0/13',
5369         'RW': '41.186.0.0/16',
5370         'SA': '188.48.0.0/13',
5371         'SB': '202.1.160.0/19',
5372         'SC': '154.192.0.0/11',
5373         'SD': '102.120.0.0/13',
5374         'SE': '78.64.0.0/12',
5375         'SG': '8.128.0.0/10',
5376         'SI': '188.196.0.0/14',
5377         'SK': '78.98.0.0/15',
5378         'SL': '102.143.0.0/17',
5379         'SM': '89.186.32.0/19',
5380         'SN': '41.82.0.0/15',
5381         'SO': '154.115.192.0/18',
5382         'SR': '186.179.128.0/17',
5383         'SS': '105.235.208.0/21',
5384         'ST': '197.159.160.0/19',
5385         'SV': '168.243.0.0/16',
5386         'SX': '190.102.0.0/20',
5387         'SY': '5.0.0.0/16',
5388         'SZ': '41.84.224.0/19',
5389         'TC': '65.255.48.0/20',
5390         'TD': '154.68.128.0/19',
5391         'TG': '196.168.0.0/14',
5392         'TH': '171.96.0.0/13',
5393         'TJ': '85.9.128.0/18',
5394         'TK': '27.96.24.0/21',
5395         'TL': '180.189.160.0/20',
5396         'TM': '95.85.96.0/19',
5397         'TN': '197.0.0.0/11',
5398         'TO': '175.176.144.0/21',
5399         'TR': '78.160.0.0/11',
5400         'TT': '186.44.0.0/15',
5401         'TV': '202.2.96.0/19',
5402         'TW': '120.96.0.0/11',
5403         'TZ': '156.156.0.0/14',
5404         'UA': '37.52.0.0/14',
5405         'UG': '102.80.0.0/13',
5406         'US': '6.0.0.0/8',
5407         'UY': '167.56.0.0/13',
5408         'UZ': '84.54.64.0/18',
5409         'VA': '212.77.0.0/19',
5410         'VC': '207.191.240.0/21',
5411         'VE': '186.88.0.0/13',
5412         'VG': '66.81.192.0/20',
5413         'VI': '146.226.0.0/16',
5414         'VN': '14.160.0.0/11',
5415         'VU': '202.80.32.0/20',
5416         'WF': '117.20.32.0/21',
5417         'WS': '202.4.32.0/19',
5418         'YE': '134.35.0.0/16',
5419         'YT': '41.242.116.0/22',
5420         'ZA': '41.0.0.0/11',
5421         'ZM': '102.144.0.0/13',
5422         'ZW': '102.177.192.0/18',
5423     }
5424
5425     @classmethod
5426     def random_ipv4(cls, code_or_block):
5427         if len(code_or_block) == 2:
5428             block = cls._country_ip_map.get(code_or_block.upper())
5429             if not block:
5430                 return None
5431         else:
5432             block = code_or_block
5433         addr, preflen = block.split('/')
5434         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5435         addr_max = addr_min | (0xffffffff >> int(preflen))
5436         return compat_str(socket.inet_ntoa(
5437             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5438
5439
5440 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5441     def __init__(self, proxies=None):
5442         # Set default handlers
5443         for type in ('http', 'https'):
5444             setattr(self, '%s_open' % type,
5445                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5446                         meth(r, proxy, type))
5447         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5448
5449     def proxy_open(self, req, proxy, type):
5450         req_proxy = req.headers.get('Ytdl-request-proxy')
5451         if req_proxy is not None:
5452             proxy = req_proxy
5453             del req.headers['Ytdl-request-proxy']
5454
5455         if proxy == '__noproxy__':
5456             return None  # No Proxy
5457         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5458             req.add_header('Ytdl-socks-proxy', proxy)
5459             # youtube-dlc's http/https handlers do wrapping the socket with socks
5460             return None
5461         return compat_urllib_request.ProxyHandler.proxy_open(
5462             self, req, proxy, type)
5463
5464
5465 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5466 # released into Public Domain
5467 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5468
5469 def long_to_bytes(n, blocksize=0):
5470     """long_to_bytes(n:long, blocksize:int) : string
5471     Convert a long integer to a byte string.
5472
5473     If optional blocksize is given and greater than zero, pad the front of the
5474     byte string with binary zeros so that the length is a multiple of
5475     blocksize.
5476     """
5477     # after much testing, this algorithm was deemed to be the fastest
5478     s = b''
5479     n = int(n)
5480     while n > 0:
5481         s = compat_struct_pack('>I', n & 0xffffffff) + s
5482         n = n >> 32
5483     # strip off leading zeros
5484     for i in range(len(s)):
5485         if s[i] != b'\000'[0]:
5486             break
5487     else:
5488         # only happens when n == 0
5489         s = b'\000'
5490         i = 0
5491     s = s[i:]
5492     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5493     # de-padding being done above, but sigh...
5494     if blocksize > 0 and len(s) % blocksize:
5495         s = (blocksize - len(s) % blocksize) * b'\000' + s
5496     return s
5497
5498
5499 def bytes_to_long(s):
5500     """bytes_to_long(string) : long
5501     Convert a byte string to a long integer.
5502
5503     This is (essentially) the inverse of long_to_bytes().
5504     """
5505     acc = 0
5506     length = len(s)
5507     if length % 4:
5508         extra = (4 - length % 4)
5509         s = b'\000' * extra + s
5510         length = length + extra
5511     for i in range(0, length, 4):
5512         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5513     return acc
5514
5515
5516 def ohdave_rsa_encrypt(data, exponent, modulus):
5517     '''
5518     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5519
5520     Input:
5521         data: data to encrypt, bytes-like object
5522         exponent, modulus: parameter e and N of RSA algorithm, both integer
5523     Output: hex string of encrypted data
5524
5525     Limitation: supports one block encryption only
5526     '''
5527
5528     payload = int(binascii.hexlify(data[::-1]), 16)
5529     encrypted = pow(payload, exponent, modulus)
5530     return '%x' % encrypted
5531
5532
5533 def pkcs1pad(data, length):
5534     """
5535     Padding input data with PKCS#1 scheme
5536
5537     @param {int[]} data        input data
5538     @param {int}   length      target length
5539     @returns {int[]}           padded data
5540     """
5541     if len(data) > length - 11:
5542         raise ValueError('Input data too long for PKCS#1 padding')
5543
5544     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5545     return [0, 2] + pseudo_random + [0] + data
5546
5547
5548 def encode_base_n(num, n, table=None):
5549     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5550     if not table:
5551         table = FULL_TABLE[:n]
5552
5553     if n > len(table):
5554         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5555
5556     if num == 0:
5557         return table[0]
5558
5559     ret = ''
5560     while num:
5561         ret = table[num % n] + ret
5562         num = num // n
5563     return ret
5564
5565
5566 def decode_packed_codes(code):
5567     mobj = re.search(PACKED_CODES_RE, code)
5568     obfuscated_code, base, count, symbols = mobj.groups()
5569     base = int(base)
5570     count = int(count)
5571     symbols = symbols.split('|')
5572     symbol_table = {}
5573
5574     while count:
5575         count -= 1
5576         base_n_count = encode_base_n(count, base)
5577         symbol_table[base_n_count] = symbols[count] or base_n_count
5578
5579     return re.sub(
5580         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5581         obfuscated_code)
5582
5583
5584 def caesar(s, alphabet, shift):
5585     if shift == 0:
5586         return s
5587     l = len(alphabet)
5588     return ''.join(
5589         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5590         for c in s)
5591
5592
5593 def rot47(s):
5594     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5595
5596
5597 def parse_m3u8_attributes(attrib):
5598     info = {}
5599     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5600         if val.startswith('"'):
5601             val = val[1:-1]
5602         info[key] = val
5603     return info
5604
5605
5606 def urshift(val, n):
5607     return val >> n if val >= 0 else (val + 0x100000000) >> n
5608
5609
5610 # Based on png2str() written by @gdkchan and improved by @yokrysty
5611 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5612 def decode_png(png_data):
5613     # Reference: https://www.w3.org/TR/PNG/
5614     header = png_data[8:]
5615
5616     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5617         raise IOError('Not a valid PNG file.')
5618
5619     int_map = {1: '>B', 2: '>H', 4: '>I'}
5620     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5621
5622     chunks = []
5623
5624     while header:
5625         length = unpack_integer(header[:4])
5626         header = header[4:]
5627
5628         chunk_type = header[:4]
5629         header = header[4:]
5630
5631         chunk_data = header[:length]
5632         header = header[length:]
5633
5634         header = header[4:]  # Skip CRC
5635
5636         chunks.append({
5637             'type': chunk_type,
5638             'length': length,
5639             'data': chunk_data
5640         })
5641
5642     ihdr = chunks[0]['data']
5643
5644     width = unpack_integer(ihdr[:4])
5645     height = unpack_integer(ihdr[4:8])
5646
5647     idat = b''
5648
5649     for chunk in chunks:
5650         if chunk['type'] == b'IDAT':
5651             idat += chunk['data']
5652
5653     if not idat:
5654         raise IOError('Unable to read PNG data.')
5655
5656     decompressed_data = bytearray(zlib.decompress(idat))
5657
5658     stride = width * 3
5659     pixels = []
5660
5661     def _get_pixel(idx):
5662         x = idx % stride
5663         y = idx // stride
5664         return pixels[y][x]
5665
5666     for y in range(height):
5667         basePos = y * (1 + stride)
5668         filter_type = decompressed_data[basePos]
5669
5670         current_row = []
5671
5672         pixels.append(current_row)
5673
5674         for x in range(stride):
5675             color = decompressed_data[1 + basePos + x]
5676             basex = y * stride + x
5677             left = 0
5678             up = 0
5679
5680             if x > 2:
5681                 left = _get_pixel(basex - 3)
5682             if y > 0:
5683                 up = _get_pixel(basex - stride)
5684
5685             if filter_type == 1:  # Sub
5686                 color = (color + left) & 0xff
5687             elif filter_type == 2:  # Up
5688                 color = (color + up) & 0xff
5689             elif filter_type == 3:  # Average
5690                 color = (color + ((left + up) >> 1)) & 0xff
5691             elif filter_type == 4:  # Paeth
5692                 a = left
5693                 b = up
5694                 c = 0
5695
5696                 if x > 2 and y > 0:
5697                     c = _get_pixel(basex - stride - 3)
5698
5699                 p = a + b - c
5700
5701                 pa = abs(p - a)
5702                 pb = abs(p - b)
5703                 pc = abs(p - c)
5704
5705                 if pa <= pb and pa <= pc:
5706                     color = (color + a) & 0xff
5707                 elif pb <= pc:
5708                     color = (color + b) & 0xff
5709                 else:
5710                     color = (color + c) & 0xff
5711
5712             current_row.append(color)
5713
5714     return width, height, pixels
5715
5716
5717 def write_xattr(path, key, value):
5718     # This mess below finds the best xattr tool for the job
5719     try:
5720         # try the pyxattr module...
5721         import xattr
5722
5723         if hasattr(xattr, 'set'):  # pyxattr
5724             # Unicode arguments are not supported in python-pyxattr until
5725             # version 0.5.0
5726             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5727             pyxattr_required_version = '0.5.0'
5728             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5729                 # TODO: fallback to CLI tools
5730                 raise XAttrUnavailableError(
5731                     'python-pyxattr is detected but is too old. '
5732                     'youtube-dlc requires %s or above while your version is %s. '
5733                     'Falling back to other xattr implementations' % (
5734                         pyxattr_required_version, xattr.__version__))
5735
5736             setxattr = xattr.set
5737         else:  # xattr
5738             setxattr = xattr.setxattr
5739
5740         try:
5741             setxattr(path, key, value)
5742         except EnvironmentError as e:
5743             raise XAttrMetadataError(e.errno, e.strerror)
5744
5745     except ImportError:
5746         if compat_os_name == 'nt':
5747             # Write xattrs to NTFS Alternate Data Streams:
5748             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5749             assert ':' not in key
5750             assert os.path.exists(path)
5751
5752             ads_fn = path + ':' + key
5753             try:
5754                 with open(ads_fn, 'wb') as f:
5755                     f.write(value)
5756             except EnvironmentError as e:
5757                 raise XAttrMetadataError(e.errno, e.strerror)
5758         else:
5759             user_has_setfattr = check_executable('setfattr', ['--version'])
5760             user_has_xattr = check_executable('xattr', ['-h'])
5761
5762             if user_has_setfattr or user_has_xattr:
5763
5764                 value = value.decode('utf-8')
5765                 if user_has_setfattr:
5766                     executable = 'setfattr'
5767                     opts = ['-n', key, '-v', value]
5768                 elif user_has_xattr:
5769                     executable = 'xattr'
5770                     opts = ['-w', key, value]
5771
5772                 cmd = ([encodeFilename(executable, True)]
5773                        + [encodeArgument(o) for o in opts]
5774                        + [encodeFilename(path, True)])
5775
5776                 try:
5777                     p = subprocess.Popen(
5778                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5779                 except EnvironmentError as e:
5780                     raise XAttrMetadataError(e.errno, e.strerror)
5781                 stdout, stderr = process_communicate_or_kill(p)
5782                 stderr = stderr.decode('utf-8', 'replace')
5783                 if p.returncode != 0:
5784                     raise XAttrMetadataError(p.returncode, stderr)
5785
5786             else:
5787                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5788                 if sys.platform.startswith('linux'):
5789                     raise XAttrUnavailableError(
5790                         "Couldn't find a tool to set the xattrs. "
5791                         "Install either the python 'pyxattr' or 'xattr' "
5792                         "modules, or the GNU 'attr' package "
5793                         "(which contains the 'setfattr' tool).")
5794                 else:
5795                     raise XAttrUnavailableError(
5796                         "Couldn't find a tool to set the xattrs. "
5797                         "Install either the python 'xattr' module, "
5798                         "or the 'xattr' binary.")
5799
5800
5801 def random_birthday(year_field, month_field, day_field):
5802     start_date = datetime.date(1950, 1, 1)
5803     end_date = datetime.date(1995, 12, 31)
5804     offset = random.randint(0, (end_date - start_date).days)
5805     random_date = start_date + datetime.timedelta(offset)
5806     return {
5807         year_field: str(random_date.year),
5808         month_field: str(random_date.month),
5809         day_field: str(random_date.day),
5810     }
5811
5812
5813 # Templates for internet shortcut files, which are plain text files.
5814 DOT_URL_LINK_TEMPLATE = '''
5815 [InternetShortcut]
5816 URL=%(url)s
5817 '''.lstrip()
5818
5819 DOT_WEBLOC_LINK_TEMPLATE = '''
5820 <?xml version="1.0" encoding="UTF-8"?>
5821 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5822 <plist version="1.0">
5823 <dict>
5824 \t<key>URL</key>
5825 \t<string>%(url)s</string>
5826 </dict>
5827 </plist>
5828 '''.lstrip()
5829
5830 DOT_DESKTOP_LINK_TEMPLATE = '''
5831 [Desktop Entry]
5832 Encoding=UTF-8
5833 Name=%(filename)s
5834 Type=Link
5835 URL=%(url)s
5836 Icon=text-html
5837 '''.lstrip()
5838
5839
5840 def iri_to_uri(iri):
5841     """
5842     Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5843
5844     The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5845     """
5846
5847     iri_parts = compat_urllib_parse_urlparse(iri)
5848
5849     if '[' in iri_parts.netloc:
5850         raise ValueError('IPv6 URIs are not, yet, supported.')
5851         # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5852
5853     # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5854
5855     net_location = ''
5856     if iri_parts.username:
5857         net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5858         if iri_parts.password is not None:
5859             net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5860         net_location += '@'
5861
5862     net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
5863     # The 'idna' encoding produces ASCII text.
5864     if iri_parts.port is not None and iri_parts.port != 80:
5865         net_location += ':' + str(iri_parts.port)
5866
5867     return compat_urllib_parse_urlunparse(
5868         (iri_parts.scheme,
5869             net_location,
5870
5871             compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5872
5873             # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5874             compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5875
5876             # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5877             compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5878
5879             compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5880
5881     # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5882
5883
5884 def to_high_limit_path(path):
5885     if sys.platform in ['win32', 'cygwin']:
5886         # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5887         return r'\\?\ '.rstrip() + os.path.abspath(path)
5888
5889     return path
5890
5891
5892 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5893     val = obj.get(field, default)
5894     if func and val not in ignore:
5895         val = func(val)
5896     return template % val if val not in ignore else default
5897
5898
5899 def clean_podcast_url(url):
5900     return re.sub(r'''(?x)
5901         (?:
5902             (?:
5903                 chtbl\.com/track|
5904                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5905                 play\.podtrac\.com
5906             )/[^/]+|
5907             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5908             flex\.acast\.com|
5909             pd(?:
5910                 cn\.co| # https://podcorn.com/analytics-prefix/
5911                 st\.fm # https://podsights.com/docs/
5912             )/e
5913         )/''', '', url)
5914
5915
5916 _HEX_TABLE = '0123456789abcdef'
5917
5918
5919 def random_uuidv4():
5920     return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5921
5922
5923 def make_dir(path, to_screen=None):
5924     try:
5925         dn = os.path.dirname(path)
5926         if dn and not os.path.exists(dn):
5927             os.makedirs(dn)
5928         return True
5929     except (OSError, IOError) as err:
5930         if callable(to_screen) is not None:
5931             to_screen('unable to create directory ' + error_to_compat_str(err))
5932         return False
5933
5934
5935 def get_executable_path():
5936     path = os.path.dirname(sys.argv[0])
5937     if os.path.abspath(sys.argv[0]) != os.path.abspath(sys.executable):  # Not packaged
5938         path = os.path.join(path, '..')
5939     return os.path.abspath(path)
5940
5941
5942 def load_plugins(name, type, namespace):
5943     plugin_info = [None]
5944     classes = []
5945     try:
5946         plugin_info = imp.find_module(
5947             name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5948         plugins = imp.load_module(name, *plugin_info)
5949         for name in dir(plugins):
5950             if not name.endswith(type):
5951                 continue
5952             klass = getattr(plugins, name)
5953             classes.append(klass)
5954             namespace[name] = klass
5955     except ImportError:
5956         pass
5957     finally:
5958         if plugin_info[0] is not None:
5959             plugin_info[0].close()
5960     return classes
5961
5962
5963 def traverse_dict(dictn, keys, casesense=True):
5964     if not isinstance(dictn, dict):
5965         return None
5966     first_key = keys[0]
5967     if not casesense:
5968         dictn = {key.lower(): val for key, val in dictn.items()}
5969         first_key = first_key.lower()
5970     value = dictn.get(first_key, None)
5971     return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)