youtube_dlc/utils.py

   1 #!/usr/bin/env python
   2 # coding: utf-8
   3
   4 from __future__ import unicode_literals
   5
   6 import base64
   7 import binascii
   8 import calendar
   9 import codecs
  10 import collections
  11 import contextlib
  12 import ctypes
  13 import datetime
  14 import email.utils
  15 import email.header
  16 import errno
  17 import functools
  18 import gzip
  19 import io
  20 import itertools
  21 import json
  22 import locale
  23 import math
  24 import operator
  25 import os
  26 import platform
  27 import random
  28 import re
  29 import socket
  30 import ssl
  31 import subprocess
  32 import sys
  33 import tempfile
  34 import time
  35 import traceback
  36 import xml.etree.ElementTree
  37 import zlib
  38
  39 from .compat import (
  40     compat_HTMLParseError,
  41     compat_HTMLParser,
  42     compat_basestring,
  43     compat_chr,
  44     compat_cookiejar,
  45     compat_ctypes_WINFUNCTYPE,
  46     compat_etree_fromstring,
  47     compat_expanduser,
  48     compat_html_entities,
  49     compat_html_entities_html5,
  50     compat_http_client,
  51     compat_integer_types,
  52     compat_kwargs,
  53     compat_os_name,
  54     compat_parse_qs,
  55     compat_shlex_quote,
  56     compat_str,
  57     compat_struct_pack,
  58     compat_struct_unpack,
  59     compat_urllib_error,
  60     compat_urllib_parse,
  61     compat_urllib_parse_urlencode,
  62     compat_urllib_parse_urlparse,
  63     compat_urllib_parse_unquote_plus,
  64     compat_urllib_request,
  65     compat_urlparse,
  66     compat_xpath,
  67 )
  68
  69 from .socks import (
  70     ProxyType,
  71     sockssocket,
  72 )
  73
  74
  75 def register_socks_protocols():
  76     # "Register" SOCKS protocols
  77     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
  78     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
  79     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
  80         if scheme not in compat_urlparse.uses_netloc:
  81             compat_urlparse.uses_netloc.append(scheme)
  82
  83
  84 # This is not clearly defined otherwise
  85 compiled_regex_type = type(re.compile(''))
  86
  87
  88 def random_user_agent():
  89     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
  90     _CHROME_VERSIONS = (
  91         '74.0.3729.129',
  92         '76.0.3780.3',
  93         '76.0.3780.2',
  94         '74.0.3729.128',
  95         '76.0.3780.1',
  96         '76.0.3780.0',
  97         '75.0.3770.15',
  98         '74.0.3729.127',
  99         '74.0.3729.126',
 100         '76.0.3779.1',
 101         '76.0.3779.0',
 102         '75.0.3770.14',
 103         '74.0.3729.125',
 104         '76.0.3778.1',
 105         '76.0.3778.0',
 106         '75.0.3770.13',
 107         '74.0.3729.124',
 108         '74.0.3729.123',
 109         '73.0.3683.121',
 110         '76.0.3777.1',
 111         '76.0.3777.0',
 112         '75.0.3770.12',
 113         '74.0.3729.122',
 114         '76.0.3776.4',
 115         '75.0.3770.11',
 116         '74.0.3729.121',
 117         '76.0.3776.3',
 118         '76.0.3776.2',
 119         '73.0.3683.120',
 120         '74.0.3729.120',
 121         '74.0.3729.119',
 122         '74.0.3729.118',
 123         '76.0.3776.1',
 124         '76.0.3776.0',
 125         '76.0.3775.5',
 126         '75.0.3770.10',
 127         '74.0.3729.117',
 128         '76.0.3775.4',
 129         '76.0.3775.3',
 130         '74.0.3729.116',
 131         '75.0.3770.9',
 132         '76.0.3775.2',
 133         '76.0.3775.1',
 134         '76.0.3775.0',
 135         '75.0.3770.8',
 136         '74.0.3729.115',
 137         '74.0.3729.114',
 138         '76.0.3774.1',
 139         '76.0.3774.0',
 140         '75.0.3770.7',
 141         '74.0.3729.113',
 142         '74.0.3729.112',
 143         '74.0.3729.111',
 144         '76.0.3773.1',
 145         '76.0.3773.0',
 146         '75.0.3770.6',
 147         '74.0.3729.110',
 148         '74.0.3729.109',
 149         '76.0.3772.1',
 150         '76.0.3772.0',
 151         '75.0.3770.5',
 152         '74.0.3729.108',
 153         '74.0.3729.107',
 154         '76.0.3771.1',
 155         '76.0.3771.0',
 156         '75.0.3770.4',
 157         '74.0.3729.106',
 158         '74.0.3729.105',
 159         '75.0.3770.3',
 160         '74.0.3729.104',
 161         '74.0.3729.103',
 162         '74.0.3729.102',
 163         '75.0.3770.2',
 164         '74.0.3729.101',
 165         '75.0.3770.1',
 166         '75.0.3770.0',
 167         '74.0.3729.100',
 168         '75.0.3769.5',
 169         '75.0.3769.4',
 170         '74.0.3729.99',
 171         '75.0.3769.3',
 172         '75.0.3769.2',
 173         '75.0.3768.6',
 174         '74.0.3729.98',
 175         '75.0.3769.1',
 176         '75.0.3769.0',
 177         '74.0.3729.97',
 178         '73.0.3683.119',
 179         '73.0.3683.118',
 180         '74.0.3729.96',
 181         '75.0.3768.5',
 182         '75.0.3768.4',
 183         '75.0.3768.3',
 184         '75.0.3768.2',
 185         '74.0.3729.95',
 186         '74.0.3729.94',
 187         '75.0.3768.1',
 188         '75.0.3768.0',
 189         '74.0.3729.93',
 190         '74.0.3729.92',
 191         '73.0.3683.117',
 192         '74.0.3729.91',
 193         '75.0.3766.3',
 194         '74.0.3729.90',
 195         '75.0.3767.2',
 196         '75.0.3767.1',
 197         '75.0.3767.0',
 198         '74.0.3729.89',
 199         '73.0.3683.116',
 200         '75.0.3766.2',
 201         '74.0.3729.88',
 202         '75.0.3766.1',
 203         '75.0.3766.0',
 204         '74.0.3729.87',
 205         '73.0.3683.115',
 206         '74.0.3729.86',
 207         '75.0.3765.1',
 208         '75.0.3765.0',
 209         '74.0.3729.85',
 210         '73.0.3683.114',
 211         '74.0.3729.84',
 212         '75.0.3764.1',
 213         '75.0.3764.0',
 214         '74.0.3729.83',
 215         '73.0.3683.113',
 216         '75.0.3763.2',
 217         '75.0.3761.4',
 218         '74.0.3729.82',
 219         '75.0.3763.1',
 220         '75.0.3763.0',
 221         '74.0.3729.81',
 222         '73.0.3683.112',
 223         '75.0.3762.1',
 224         '75.0.3762.0',
 225         '74.0.3729.80',
 226         '75.0.3761.3',
 227         '74.0.3729.79',
 228         '73.0.3683.111',
 229         '75.0.3761.2',
 230         '74.0.3729.78',
 231         '74.0.3729.77',
 232         '75.0.3761.1',
 233         '75.0.3761.0',
 234         '73.0.3683.110',
 235         '74.0.3729.76',
 236         '74.0.3729.75',
 237         '75.0.3760.0',
 238         '74.0.3729.74',
 239         '75.0.3759.8',
 240         '75.0.3759.7',
 241         '75.0.3759.6',
 242         '74.0.3729.73',
 243         '75.0.3759.5',
 244         '74.0.3729.72',
 245         '73.0.3683.109',
 246         '75.0.3759.4',
 247         '75.0.3759.3',
 248         '74.0.3729.71',
 249         '75.0.3759.2',
 250         '74.0.3729.70',
 251         '73.0.3683.108',
 252         '74.0.3729.69',
 253         '75.0.3759.1',
 254         '75.0.3759.0',
 255         '74.0.3729.68',
 256         '73.0.3683.107',
 257         '74.0.3729.67',
 258         '75.0.3758.1',
 259         '75.0.3758.0',
 260         '74.0.3729.66',
 261         '73.0.3683.106',
 262         '74.0.3729.65',
 263         '75.0.3757.1',
 264         '75.0.3757.0',
 265         '74.0.3729.64',
 266         '73.0.3683.105',
 267         '74.0.3729.63',
 268         '75.0.3756.1',
 269         '75.0.3756.0',
 270         '74.0.3729.62',
 271         '73.0.3683.104',
 272         '75.0.3755.3',
 273         '75.0.3755.2',
 274         '73.0.3683.103',
 275         '75.0.3755.1',
 276         '75.0.3755.0',
 277         '74.0.3729.61',
 278         '73.0.3683.102',
 279         '74.0.3729.60',
 280         '75.0.3754.2',
 281         '74.0.3729.59',
 282         '75.0.3753.4',
 283         '74.0.3729.58',
 284         '75.0.3754.1',
 285         '75.0.3754.0',
 286         '74.0.3729.57',
 287         '73.0.3683.101',
 288         '75.0.3753.3',
 289         '75.0.3752.2',
 290         '75.0.3753.2',
 291         '74.0.3729.56',
 292         '75.0.3753.1',
 293         '75.0.3753.0',
 294         '74.0.3729.55',
 295         '73.0.3683.100',
 296         '74.0.3729.54',
 297         '75.0.3752.1',
 298         '75.0.3752.0',
 299         '74.0.3729.53',
 300         '73.0.3683.99',
 301         '74.0.3729.52',
 302         '75.0.3751.1',
 303         '75.0.3751.0',
 304         '74.0.3729.51',
 305         '73.0.3683.98',
 306         '74.0.3729.50',
 307         '75.0.3750.0',
 308         '74.0.3729.49',
 309         '74.0.3729.48',
 310         '74.0.3729.47',
 311         '75.0.3749.3',
 312         '74.0.3729.46',
 313         '73.0.3683.97',
 314         '75.0.3749.2',
 315         '74.0.3729.45',
 316         '75.0.3749.1',
 317         '75.0.3749.0',
 318         '74.0.3729.44',
 319         '73.0.3683.96',
 320         '74.0.3729.43',
 321         '74.0.3729.42',
 322         '75.0.3748.1',
 323         '75.0.3748.0',
 324         '74.0.3729.41',
 325         '75.0.3747.1',
 326         '73.0.3683.95',
 327         '75.0.3746.4',
 328         '74.0.3729.40',
 329         '74.0.3729.39',
 330         '75.0.3747.0',
 331         '75.0.3746.3',
 332         '75.0.3746.2',
 333         '74.0.3729.38',
 334         '75.0.3746.1',
 335         '75.0.3746.0',
 336         '74.0.3729.37',
 337         '73.0.3683.94',
 338         '75.0.3745.5',
 339         '75.0.3745.4',
 340         '75.0.3745.3',
 341         '75.0.3745.2',
 342         '74.0.3729.36',
 343         '75.0.3745.1',
 344         '75.0.3745.0',
 345         '75.0.3744.2',
 346         '74.0.3729.35',
 347         '73.0.3683.93',
 348         '74.0.3729.34',
 349         '75.0.3744.1',
 350         '75.0.3744.0',
 351         '74.0.3729.33',
 352         '73.0.3683.92',
 353         '74.0.3729.32',
 354         '74.0.3729.31',
 355         '73.0.3683.91',
 356         '75.0.3741.2',
 357         '75.0.3740.5',
 358         '74.0.3729.30',
 359         '75.0.3741.1',
 360         '75.0.3741.0',
 361         '74.0.3729.29',
 362         '75.0.3740.4',
 363         '73.0.3683.90',
 364         '74.0.3729.28',
 365         '75.0.3740.3',
 366         '73.0.3683.89',
 367         '75.0.3740.2',
 368         '74.0.3729.27',
 369         '75.0.3740.1',
 370         '75.0.3740.0',
 371         '74.0.3729.26',
 372         '73.0.3683.88',
 373         '73.0.3683.87',
 374         '74.0.3729.25',
 375         '75.0.3739.1',
 376         '75.0.3739.0',
 377         '73.0.3683.86',
 378         '74.0.3729.24',
 379         '73.0.3683.85',
 380         '75.0.3738.4',
 381         '75.0.3738.3',
 382         '75.0.3738.2',
 383         '75.0.3738.1',
 384         '75.0.3738.0',
 385         '74.0.3729.23',
 386         '73.0.3683.84',
 387         '74.0.3729.22',
 388         '74.0.3729.21',
 389         '75.0.3737.1',
 390         '75.0.3737.0',
 391         '74.0.3729.20',
 392         '73.0.3683.83',
 393         '74.0.3729.19',
 394         '75.0.3736.1',
 395         '75.0.3736.0',
 396         '74.0.3729.18',
 397         '73.0.3683.82',
 398         '74.0.3729.17',
 399         '75.0.3735.1',
 400         '75.0.3735.0',
 401         '74.0.3729.16',
 402         '73.0.3683.81',
 403         '75.0.3734.1',
 404         '75.0.3734.0',
 405         '74.0.3729.15',
 406         '73.0.3683.80',
 407         '74.0.3729.14',
 408         '75.0.3733.1',
 409         '75.0.3733.0',
 410         '75.0.3732.1',
 411         '74.0.3729.13',
 412         '74.0.3729.12',
 413         '73.0.3683.79',
 414         '74.0.3729.11',
 415         '75.0.3732.0',
 416         '74.0.3729.10',
 417         '73.0.3683.78',
 418         '74.0.3729.9',
 419         '74.0.3729.8',
 420         '74.0.3729.7',
 421         '75.0.3731.3',
 422         '75.0.3731.2',
 423         '75.0.3731.0',
 424         '74.0.3729.6',
 425         '73.0.3683.77',
 426         '73.0.3683.76',
 427         '75.0.3730.5',
 428         '75.0.3730.4',
 429         '73.0.3683.75',
 430         '74.0.3729.5',
 431         '73.0.3683.74',
 432         '75.0.3730.3',
 433         '75.0.3730.2',
 434         '74.0.3729.4',
 435         '73.0.3683.73',
 436         '73.0.3683.72',
 437         '75.0.3730.1',
 438         '75.0.3730.0',
 439         '74.0.3729.3',
 440         '73.0.3683.71',
 441         '74.0.3729.2',
 442         '73.0.3683.70',
 443         '74.0.3729.1',
 444         '74.0.3729.0',
 445         '74.0.3726.4',
 446         '73.0.3683.69',
 447         '74.0.3726.3',
 448         '74.0.3728.0',
 449         '74.0.3726.2',
 450         '73.0.3683.68',
 451         '74.0.3726.1',
 452         '74.0.3726.0',
 453         '74.0.3725.4',
 454         '73.0.3683.67',
 455         '73.0.3683.66',
 456         '74.0.3725.3',
 457         '74.0.3725.2',
 458         '74.0.3725.1',
 459         '74.0.3724.8',
 460         '74.0.3725.0',
 461         '73.0.3683.65',
 462         '74.0.3724.7',
 463         '74.0.3724.6',
 464         '74.0.3724.5',
 465         '74.0.3724.4',
 466         '74.0.3724.3',
 467         '74.0.3724.2',
 468         '74.0.3724.1',
 469         '74.0.3724.0',
 470         '73.0.3683.64',
 471         '74.0.3723.1',
 472         '74.0.3723.0',
 473         '73.0.3683.63',
 474         '74.0.3722.1',
 475         '74.0.3722.0',
 476         '73.0.3683.62',
 477         '74.0.3718.9',
 478         '74.0.3702.3',
 479         '74.0.3721.3',
 480         '74.0.3721.2',
 481         '74.0.3721.1',
 482         '74.0.3721.0',
 483         '74.0.3720.6',
 484         '73.0.3683.61',
 485         '72.0.3626.122',
 486         '73.0.3683.60',
 487         '74.0.3720.5',
 488         '72.0.3626.121',
 489         '74.0.3718.8',
 490         '74.0.3720.4',
 491         '74.0.3720.3',
 492         '74.0.3718.7',
 493         '74.0.3720.2',
 494         '74.0.3720.1',
 495         '74.0.3720.0',
 496         '74.0.3718.6',
 497         '74.0.3719.5',
 498         '73.0.3683.59',
 499         '74.0.3718.5',
 500         '74.0.3718.4',
 501         '74.0.3719.4',
 502         '74.0.3719.3',
 503         '74.0.3719.2',
 504         '74.0.3719.1',
 505         '73.0.3683.58',
 506         '74.0.3719.0',
 507         '73.0.3683.57',
 508         '73.0.3683.56',
 509         '74.0.3718.3',
 510         '73.0.3683.55',
 511         '74.0.3718.2',
 512         '74.0.3718.1',
 513         '74.0.3718.0',
 514         '73.0.3683.54',
 515         '74.0.3717.2',
 516         '73.0.3683.53',
 517         '74.0.3717.1',
 518         '74.0.3717.0',
 519         '73.0.3683.52',
 520         '74.0.3716.1',
 521         '74.0.3716.0',
 522         '73.0.3683.51',
 523         '74.0.3715.1',
 524         '74.0.3715.0',
 525         '73.0.3683.50',
 526         '74.0.3711.2',
 527         '74.0.3714.2',
 528         '74.0.3713.3',
 529         '74.0.3714.1',
 530         '74.0.3714.0',
 531         '73.0.3683.49',
 532         '74.0.3713.1',
 533         '74.0.3713.0',
 534         '72.0.3626.120',
 535         '73.0.3683.48',
 536         '74.0.3712.2',
 537         '74.0.3712.1',
 538         '74.0.3712.0',
 539         '73.0.3683.47',
 540         '72.0.3626.119',
 541         '73.0.3683.46',
 542         '74.0.3710.2',
 543         '72.0.3626.118',
 544         '74.0.3711.1',
 545         '74.0.3711.0',
 546         '73.0.3683.45',
 547         '72.0.3626.117',
 548         '74.0.3710.1',
 549         '74.0.3710.0',
 550         '73.0.3683.44',
 551         '72.0.3626.116',
 552         '74.0.3709.1',
 553         '74.0.3709.0',
 554         '74.0.3704.9',
 555         '73.0.3683.43',
 556         '72.0.3626.115',
 557         '74.0.3704.8',
 558         '74.0.3704.7',
 559         '74.0.3708.0',
 560         '74.0.3706.7',
 561         '74.0.3704.6',
 562         '73.0.3683.42',
 563         '72.0.3626.114',
 564         '74.0.3706.6',
 565         '72.0.3626.113',
 566         '74.0.3704.5',
 567         '74.0.3706.5',
 568         '74.0.3706.4',
 569         '74.0.3706.3',
 570         '74.0.3706.2',
 571         '74.0.3706.1',
 572         '74.0.3706.0',
 573         '73.0.3683.41',
 574         '72.0.3626.112',
 575         '74.0.3705.1',
 576         '74.0.3705.0',
 577         '73.0.3683.40',
 578         '72.0.3626.111',
 579         '73.0.3683.39',
 580         '74.0.3704.4',
 581         '73.0.3683.38',
 582         '74.0.3704.3',
 583         '74.0.3704.2',
 584         '74.0.3704.1',
 585         '74.0.3704.0',
 586         '73.0.3683.37',
 587         '72.0.3626.110',
 588         '72.0.3626.109',
 589         '74.0.3703.3',
 590         '74.0.3703.2',
 591         '73.0.3683.36',
 592         '74.0.3703.1',
 593         '74.0.3703.0',
 594         '73.0.3683.35',
 595         '72.0.3626.108',
 596         '74.0.3702.2',
 597         '74.0.3699.3',
 598         '74.0.3702.1',
 599         '74.0.3702.0',
 600         '73.0.3683.34',
 601         '72.0.3626.107',
 602         '73.0.3683.33',
 603         '74.0.3701.1',
 604         '74.0.3701.0',
 605         '73.0.3683.32',
 606         '73.0.3683.31',
 607         '72.0.3626.105',
 608         '74.0.3700.1',
 609         '74.0.3700.0',
 610         '73.0.3683.29',
 611         '72.0.3626.103',
 612         '74.0.3699.2',
 613         '74.0.3699.1',
 614         '74.0.3699.0',
 615         '73.0.3683.28',
 616         '72.0.3626.102',
 617         '73.0.3683.27',
 618         '73.0.3683.26',
 619         '74.0.3698.0',
 620         '74.0.3696.2',
 621         '72.0.3626.101',
 622         '73.0.3683.25',
 623         '74.0.3696.1',
 624         '74.0.3696.0',
 625         '74.0.3694.8',
 626         '72.0.3626.100',
 627         '74.0.3694.7',
 628         '74.0.3694.6',
 629         '74.0.3694.5',
 630         '74.0.3694.4',
 631         '72.0.3626.99',
 632         '72.0.3626.98',
 633         '74.0.3694.3',
 634         '73.0.3683.24',
 635         '72.0.3626.97',
 636         '72.0.3626.96',
 637         '72.0.3626.95',
 638         '73.0.3683.23',
 639         '72.0.3626.94',
 640         '73.0.3683.22',
 641         '73.0.3683.21',
 642         '72.0.3626.93',
 643         '74.0.3694.2',
 644         '72.0.3626.92',
 645         '74.0.3694.1',
 646         '74.0.3694.0',
 647         '74.0.3693.6',
 648         '73.0.3683.20',
 649         '72.0.3626.91',
 650         '74.0.3693.5',
 651         '74.0.3693.4',
 652         '74.0.3693.3',
 653         '74.0.3693.2',
 654         '73.0.3683.19',
 655         '74.0.3693.1',
 656         '74.0.3693.0',
 657         '73.0.3683.18',
 658         '72.0.3626.90',
 659         '74.0.3692.1',
 660         '74.0.3692.0',
 661         '73.0.3683.17',
 662         '72.0.3626.89',
 663         '74.0.3687.3',
 664         '74.0.3691.1',
 665         '74.0.3691.0',
 666         '73.0.3683.16',
 667         '72.0.3626.88',
 668         '72.0.3626.87',
 669         '73.0.3683.15',
 670         '74.0.3690.1',
 671         '74.0.3690.0',
 672         '73.0.3683.14',
 673         '72.0.3626.86',
 674         '73.0.3683.13',
 675         '73.0.3683.12',
 676         '74.0.3689.1',
 677         '74.0.3689.0',
 678         '73.0.3683.11',
 679         '72.0.3626.85',
 680         '73.0.3683.10',
 681         '72.0.3626.84',
 682         '73.0.3683.9',
 683         '74.0.3688.1',
 684         '74.0.3688.0',
 685         '73.0.3683.8',
 686         '72.0.3626.83',
 687         '74.0.3687.2',
 688         '74.0.3687.1',
 689         '74.0.3687.0',
 690         '73.0.3683.7',
 691         '72.0.3626.82',
 692         '74.0.3686.4',
 693         '72.0.3626.81',
 694         '74.0.3686.3',
 695         '74.0.3686.2',
 696         '74.0.3686.1',
 697         '74.0.3686.0',
 698         '73.0.3683.6',
 699         '72.0.3626.80',
 700         '74.0.3685.1',
 701         '74.0.3685.0',
 702         '73.0.3683.5',
 703         '72.0.3626.79',
 704         '74.0.3684.1',
 705         '74.0.3684.0',
 706         '73.0.3683.4',
 707         '72.0.3626.78',
 708         '72.0.3626.77',
 709         '73.0.3683.3',
 710         '73.0.3683.2',
 711         '72.0.3626.76',
 712         '73.0.3683.1',
 713         '73.0.3683.0',
 714         '72.0.3626.75',
 715         '71.0.3578.141',
 716         '73.0.3682.1',
 717         '73.0.3682.0',
 718         '72.0.3626.74',
 719         '71.0.3578.140',
 720         '73.0.3681.4',
 721         '73.0.3681.3',
 722         '73.0.3681.2',
 723         '73.0.3681.1',
 724         '73.0.3681.0',
 725         '72.0.3626.73',
 726         '71.0.3578.139',
 727         '72.0.3626.72',
 728         '72.0.3626.71',
 729         '73.0.3680.1',
 730         '73.0.3680.0',
 731         '72.0.3626.70',
 732         '71.0.3578.138',
 733         '73.0.3678.2',
 734         '73.0.3679.1',
 735         '73.0.3679.0',
 736         '72.0.3626.69',
 737         '71.0.3578.137',
 738         '73.0.3678.1',
 739         '73.0.3678.0',
 740         '71.0.3578.136',
 741         '73.0.3677.1',
 742         '73.0.3677.0',
 743         '72.0.3626.68',
 744         '72.0.3626.67',
 745         '71.0.3578.135',
 746         '73.0.3676.1',
 747         '73.0.3676.0',
 748         '73.0.3674.2',
 749         '72.0.3626.66',
 750         '71.0.3578.134',
 751         '73.0.3674.1',
 752         '73.0.3674.0',
 753         '72.0.3626.65',
 754         '71.0.3578.133',
 755         '73.0.3673.2',
 756         '73.0.3673.1',
 757         '73.0.3673.0',
 758         '72.0.3626.64',
 759         '71.0.3578.132',
 760         '72.0.3626.63',
 761         '72.0.3626.62',
 762         '72.0.3626.61',
 763         '72.0.3626.60',
 764         '73.0.3672.1',
 765         '73.0.3672.0',
 766         '72.0.3626.59',
 767         '71.0.3578.131',
 768         '73.0.3671.3',
 769         '73.0.3671.2',
 770         '73.0.3671.1',
 771         '73.0.3671.0',
 772         '72.0.3626.58',
 773         '71.0.3578.130',
 774         '73.0.3670.1',
 775         '73.0.3670.0',
 776         '72.0.3626.57',
 777         '71.0.3578.129',
 778         '73.0.3669.1',
 779         '73.0.3669.0',
 780         '72.0.3626.56',
 781         '71.0.3578.128',
 782         '73.0.3668.2',
 783         '73.0.3668.1',
 784         '73.0.3668.0',
 785         '72.0.3626.55',
 786         '71.0.3578.127',
 787         '73.0.3667.2',
 788         '73.0.3667.1',
 789         '73.0.3667.0',
 790         '72.0.3626.54',
 791         '71.0.3578.126',
 792         '73.0.3666.1',
 793         '73.0.3666.0',
 794         '72.0.3626.53',
 795         '71.0.3578.125',
 796         '73.0.3665.4',
 797         '73.0.3665.3',
 798         '72.0.3626.52',
 799         '73.0.3665.2',
 800         '73.0.3664.4',
 801         '73.0.3665.1',
 802         '73.0.3665.0',
 803         '72.0.3626.51',
 804         '71.0.3578.124',
 805         '72.0.3626.50',
 806         '73.0.3664.3',
 807         '73.0.3664.2',
 808         '73.0.3664.1',
 809         '73.0.3664.0',
 810         '73.0.3663.2',
 811         '72.0.3626.49',
 812         '71.0.3578.123',
 813         '73.0.3663.1',
 814         '73.0.3663.0',
 815         '72.0.3626.48',
 816         '71.0.3578.122',
 817         '73.0.3662.1',
 818         '73.0.3662.0',
 819         '72.0.3626.47',
 820         '71.0.3578.121',
 821         '73.0.3661.1',
 822         '72.0.3626.46',
 823         '73.0.3661.0',
 824         '72.0.3626.45',
 825         '71.0.3578.120',
 826         '73.0.3660.2',
 827         '73.0.3660.1',
 828         '73.0.3660.0',
 829         '72.0.3626.44',
 830         '71.0.3578.119',
 831         '73.0.3659.1',
 832         '73.0.3659.0',
 833         '72.0.3626.43',
 834         '71.0.3578.118',
 835         '73.0.3658.1',
 836         '73.0.3658.0',
 837         '72.0.3626.42',
 838         '71.0.3578.117',
 839         '73.0.3657.1',
 840         '73.0.3657.0',
 841         '72.0.3626.41',
 842         '71.0.3578.116',
 843         '73.0.3656.1',
 844         '73.0.3656.0',
 845         '72.0.3626.40',
 846         '71.0.3578.115',
 847         '73.0.3655.1',
 848         '73.0.3655.0',
 849         '72.0.3626.39',
 850         '71.0.3578.114',
 851         '73.0.3654.1',
 852         '73.0.3654.0',
 853         '72.0.3626.38',
 854         '71.0.3578.113',
 855         '73.0.3653.1',
 856         '73.0.3653.0',
 857         '72.0.3626.37',
 858         '71.0.3578.112',
 859         '73.0.3652.1',
 860         '73.0.3652.0',
 861         '72.0.3626.36',
 862         '71.0.3578.111',
 863         '73.0.3651.1',
 864         '73.0.3651.0',
 865         '72.0.3626.35',
 866         '71.0.3578.110',
 867         '73.0.3650.1',
 868         '73.0.3650.0',
 869         '72.0.3626.34',
 870         '71.0.3578.109',
 871         '73.0.3649.1',
 872         '73.0.3649.0',
 873         '72.0.3626.33',
 874         '71.0.3578.108',
 875         '73.0.3648.2',
 876         '73.0.3648.1',
 877         '73.0.3648.0',
 878         '72.0.3626.32',
 879         '71.0.3578.107',
 880         '73.0.3647.2',
 881         '73.0.3647.1',
 882         '73.0.3647.0',
 883         '72.0.3626.31',
 884         '71.0.3578.106',
 885         '73.0.3635.3',
 886         '73.0.3646.2',
 887         '73.0.3646.1',
 888         '73.0.3646.0',
 889         '72.0.3626.30',
 890         '71.0.3578.105',
 891         '72.0.3626.29',
 892         '73.0.3645.2',
 893         '73.0.3645.1',
 894         '73.0.3645.0',
 895         '72.0.3626.28',
 896         '71.0.3578.104',
 897         '72.0.3626.27',
 898         '72.0.3626.26',
 899         '72.0.3626.25',
 900         '72.0.3626.24',
 901         '73.0.3644.0',
 902         '73.0.3643.2',
 903         '72.0.3626.23',
 904         '71.0.3578.103',
 905         '73.0.3643.1',
 906         '73.0.3643.0',
 907         '72.0.3626.22',
 908         '71.0.3578.102',
 909         '73.0.3642.1',
 910         '73.0.3642.0',
 911         '72.0.3626.21',
 912         '71.0.3578.101',
 913         '73.0.3641.1',
 914         '73.0.3641.0',
 915         '72.0.3626.20',
 916         '71.0.3578.100',
 917         '72.0.3626.19',
 918         '73.0.3640.1',
 919         '73.0.3640.0',
 920         '72.0.3626.18',
 921         '73.0.3639.1',
 922         '71.0.3578.99',
 923         '73.0.3639.0',
 924         '72.0.3626.17',
 925         '73.0.3638.2',
 926         '72.0.3626.16',
 927         '73.0.3638.1',
 928         '73.0.3638.0',
 929         '72.0.3626.15',
 930         '71.0.3578.98',
 931         '73.0.3635.2',
 932         '71.0.3578.97',
 933         '73.0.3637.1',
 934         '73.0.3637.0',
 935         '72.0.3626.14',
 936         '71.0.3578.96',
 937         '71.0.3578.95',
 938         '72.0.3626.13',
 939         '71.0.3578.94',
 940         '73.0.3636.2',
 941         '71.0.3578.93',
 942         '73.0.3636.1',
 943         '73.0.3636.0',
 944         '72.0.3626.12',
 945         '71.0.3578.92',
 946         '73.0.3635.1',
 947         '73.0.3635.0',
 948         '72.0.3626.11',
 949         '71.0.3578.91',
 950         '73.0.3634.2',
 951         '73.0.3634.1',
 952         '73.0.3634.0',
 953         '72.0.3626.10',
 954         '71.0.3578.90',
 955         '71.0.3578.89',
 956         '73.0.3633.2',
 957         '73.0.3633.1',
 958         '73.0.3633.0',
 959         '72.0.3610.4',
 960         '72.0.3626.9',
 961         '71.0.3578.88',
 962         '73.0.3632.5',
 963         '73.0.3632.4',
 964         '73.0.3632.3',
 965         '73.0.3632.2',
 966         '73.0.3632.1',
 967         '73.0.3632.0',
 968         '72.0.3626.8',
 969         '71.0.3578.87',
 970         '73.0.3631.2',
 971         '73.0.3631.1',
 972         '73.0.3631.0',
 973         '72.0.3626.7',
 974         '71.0.3578.86',
 975         '72.0.3626.6',
 976         '73.0.3630.1',
 977         '73.0.3630.0',
 978         '72.0.3626.5',
 979         '71.0.3578.85',
 980         '72.0.3626.4',
 981         '73.0.3628.3',
 982         '73.0.3628.2',
 983         '73.0.3629.1',
 984         '73.0.3629.0',
 985         '72.0.3626.3',
 986         '71.0.3578.84',
 987         '73.0.3628.1',
 988         '73.0.3628.0',
 989         '71.0.3578.83',
 990         '73.0.3627.1',
 991         '73.0.3627.0',
 992         '72.0.3626.2',
 993         '71.0.3578.82',
 994         '71.0.3578.81',
 995         '71.0.3578.80',
 996         '72.0.3626.1',
 997         '72.0.3626.0',
 998         '71.0.3578.79',
 999         '70.0.3538.124',
1000         '71.0.3578.78',
1001         '72.0.3623.4',
1002         '72.0.3625.2',
1003         '72.0.3625.1',
1004         '72.0.3625.0',
1005         '71.0.3578.77',
1006         '70.0.3538.123',
1007         '72.0.3624.4',
1008         '72.0.3624.3',
1009         '72.0.3624.2',
1010         '71.0.3578.76',
1011         '72.0.3624.1',
1012         '72.0.3624.0',
1013         '72.0.3623.3',
1014         '71.0.3578.75',
1015         '70.0.3538.122',
1016         '71.0.3578.74',
1017         '72.0.3623.2',
1018         '72.0.3610.3',
1019         '72.0.3623.1',
1020         '72.0.3623.0',
1021         '72.0.3622.3',
1022         '72.0.3622.2',
1023         '71.0.3578.73',
1024         '70.0.3538.121',
1025         '72.0.3622.1',
1026         '72.0.3622.0',
1027         '71.0.3578.72',
1028         '70.0.3538.120',
1029         '72.0.3621.1',
1030         '72.0.3621.0',
1031         '71.0.3578.71',
1032         '70.0.3538.119',
1033         '72.0.3620.1',
1034         '72.0.3620.0',
1035         '71.0.3578.70',
1036         '70.0.3538.118',
1037         '71.0.3578.69',
1038         '72.0.3619.1',
1039         '72.0.3619.0',
1040         '71.0.3578.68',
1041         '70.0.3538.117',
1042         '71.0.3578.67',
1043         '72.0.3618.1',
1044         '72.0.3618.0',
1045         '71.0.3578.66',
1046         '70.0.3538.116',
1047         '72.0.3617.1',
1048         '72.0.3617.0',
1049         '71.0.3578.65',
1050         '70.0.3538.115',
1051         '72.0.3602.3',
1052         '71.0.3578.64',
1053         '72.0.3616.1',
1054         '72.0.3616.0',
1055         '71.0.3578.63',
1056         '70.0.3538.114',
1057         '71.0.3578.62',
1058         '72.0.3615.1',
1059         '72.0.3615.0',
1060         '71.0.3578.61',
1061         '70.0.3538.113',
1062         '72.0.3614.1',
1063         '72.0.3614.0',
1064         '71.0.3578.60',
1065         '70.0.3538.112',
1066         '72.0.3613.1',
1067         '72.0.3613.0',
1068         '71.0.3578.59',
1069         '70.0.3538.111',
1070         '72.0.3612.2',
1071         '72.0.3612.1',
1072         '72.0.3612.0',
1073         '70.0.3538.110',
1074         '71.0.3578.58',
1075         '70.0.3538.109',
1076         '72.0.3611.2',
1077         '72.0.3611.1',
1078         '72.0.3611.0',
1079         '71.0.3578.57',
1080         '70.0.3538.108',
1081         '72.0.3610.2',
1082         '71.0.3578.56',
1083         '71.0.3578.55',
1084         '72.0.3610.1',
1085         '72.0.3610.0',
1086         '71.0.3578.54',
1087         '70.0.3538.107',
1088         '71.0.3578.53',
1089         '72.0.3609.3',
1090         '71.0.3578.52',
1091         '72.0.3609.2',
1092         '71.0.3578.51',
1093         '72.0.3608.5',
1094         '72.0.3609.1',
1095         '72.0.3609.0',
1096         '71.0.3578.50',
1097         '70.0.3538.106',
1098         '72.0.3608.4',
1099         '72.0.3608.3',
1100         '72.0.3608.2',
1101         '71.0.3578.49',
1102         '72.0.3608.1',
1103         '72.0.3608.0',
1104         '70.0.3538.105',
1105         '71.0.3578.48',
1106         '72.0.3607.1',
1107         '72.0.3607.0',
1108         '71.0.3578.47',
1109         '70.0.3538.104',
1110         '72.0.3606.2',
1111         '72.0.3606.1',
1112         '72.0.3606.0',
1113         '71.0.3578.46',
1114         '70.0.3538.103',
1115         '70.0.3538.102',
1116         '72.0.3605.3',
1117         '72.0.3605.2',
1118         '72.0.3605.1',
1119         '72.0.3605.0',
1120         '71.0.3578.45',
1121         '70.0.3538.101',
1122         '71.0.3578.44',
1123         '71.0.3578.43',
1124         '70.0.3538.100',
1125         '70.0.3538.99',
1126         '71.0.3578.42',
1127         '72.0.3604.1',
1128         '72.0.3604.0',
1129         '71.0.3578.41',
1130         '70.0.3538.98',
1131         '71.0.3578.40',
1132         '72.0.3603.2',
1133         '72.0.3603.1',
1134         '72.0.3603.0',
1135         '71.0.3578.39',
1136         '70.0.3538.97',
1137         '72.0.3602.2',
1138         '71.0.3578.38',
1139         '71.0.3578.37',
1140         '72.0.3602.1',
1141         '72.0.3602.0',
1142         '71.0.3578.36',
1143         '70.0.3538.96',
1144         '72.0.3601.1',
1145         '72.0.3601.0',
1146         '71.0.3578.35',
1147         '70.0.3538.95',
1148         '72.0.3600.1',
1149         '72.0.3600.0',
1150         '71.0.3578.34',
1151         '70.0.3538.94',
1152         '72.0.3599.3',
1153         '72.0.3599.2',
1154         '72.0.3599.1',
1155         '72.0.3599.0',
1156         '71.0.3578.33',
1157         '70.0.3538.93',
1158         '72.0.3598.1',
1159         '72.0.3598.0',
1160         '71.0.3578.32',
1161         '70.0.3538.87',
1162         '72.0.3597.1',
1163         '72.0.3597.0',
1164         '72.0.3596.2',
1165         '71.0.3578.31',
1166         '70.0.3538.86',
1167         '71.0.3578.30',
1168         '71.0.3578.29',
1169         '72.0.3596.1',
1170         '72.0.3596.0',
1171         '71.0.3578.28',
1172         '70.0.3538.85',
1173         '72.0.3595.2',
1174         '72.0.3591.3',
1175         '72.0.3595.1',
1176         '72.0.3595.0',
1177         '71.0.3578.27',
1178         '70.0.3538.84',
1179         '72.0.3594.1',
1180         '72.0.3594.0',
1181         '71.0.3578.26',
1182         '70.0.3538.83',
1183         '72.0.3593.2',
1184         '72.0.3593.1',
1185         '72.0.3593.0',
1186         '71.0.3578.25',
1187         '70.0.3538.82',
1188         '72.0.3589.3',
1189         '72.0.3592.2',
1190         '72.0.3592.1',
1191         '72.0.3592.0',
1192         '71.0.3578.24',
1193         '72.0.3589.2',
1194         '70.0.3538.81',
1195         '70.0.3538.80',
1196         '72.0.3591.2',
1197         '72.0.3591.1',
1198         '72.0.3591.0',
1199         '71.0.3578.23',
1200         '70.0.3538.79',
1201         '71.0.3578.22',
1202         '72.0.3590.1',
1203         '72.0.3590.0',
1204         '71.0.3578.21',
1205         '70.0.3538.78',
1206         '70.0.3538.77',
1207         '72.0.3589.1',
1208         '72.0.3589.0',
1209         '71.0.3578.20',
1210         '70.0.3538.76',
1211         '71.0.3578.19',
1212         '70.0.3538.75',
1213         '72.0.3588.1',
1214         '72.0.3588.0',
1215         '71.0.3578.18',
1216         '70.0.3538.74',
1217         '72.0.3586.2',
1218         '72.0.3587.0',
1219         '71.0.3578.17',
1220         '70.0.3538.73',
1221         '72.0.3586.1',
1222         '72.0.3586.0',
1223         '71.0.3578.16',
1224         '70.0.3538.72',
1225         '72.0.3585.1',
1226         '72.0.3585.0',
1227         '71.0.3578.15',
1228         '70.0.3538.71',
1229         '71.0.3578.14',
1230         '72.0.3584.1',
1231         '72.0.3584.0',
1232         '71.0.3578.13',
1233         '70.0.3538.70',
1234         '72.0.3583.2',
1235         '71.0.3578.12',
1236         '72.0.3583.1',
1237         '72.0.3583.0',
1238         '71.0.3578.11',
1239         '70.0.3538.69',
1240         '71.0.3578.10',
1241         '72.0.3582.0',
1242         '72.0.3581.4',
1243         '71.0.3578.9',
1244         '70.0.3538.67',
1245         '72.0.3581.3',
1246         '72.0.3581.2',
1247         '72.0.3581.1',
1248         '72.0.3581.0',
1249         '71.0.3578.8',
1250         '70.0.3538.66',
1251         '72.0.3580.1',
1252         '72.0.3580.0',
1253         '71.0.3578.7',
1254         '70.0.3538.65',
1255         '71.0.3578.6',
1256         '72.0.3579.1',
1257         '72.0.3579.0',
1258         '71.0.3578.5',
1259         '70.0.3538.64',
1260         '71.0.3578.4',
1261         '71.0.3578.3',
1262         '71.0.3578.2',
1263         '71.0.3578.1',
1264         '71.0.3578.0',
1265         '70.0.3538.63',
1266         '69.0.3497.128',
1267         '70.0.3538.62',
1268         '70.0.3538.61',
1269         '70.0.3538.60',
1270         '70.0.3538.59',
1271         '71.0.3577.1',
1272         '71.0.3577.0',
1273         '70.0.3538.58',
1274         '69.0.3497.127',
1275         '71.0.3576.2',
1276         '71.0.3576.1',
1277         '71.0.3576.0',
1278         '70.0.3538.57',
1279         '70.0.3538.56',
1280         '71.0.3575.2',
1281         '70.0.3538.55',
1282         '69.0.3497.126',
1283         '70.0.3538.54',
1284         '71.0.3575.1',
1285         '71.0.3575.0',
1286         '71.0.3574.1',
1287         '71.0.3574.0',
1288         '70.0.3538.53',
1289         '69.0.3497.125',
1290         '70.0.3538.52',
1291         '71.0.3573.1',
1292         '71.0.3573.0',
1293         '70.0.3538.51',
1294         '69.0.3497.124',
1295         '71.0.3572.1',
1296         '71.0.3572.0',
1297         '70.0.3538.50',
1298         '69.0.3497.123',
1299         '71.0.3571.2',
1300         '70.0.3538.49',
1301         '69.0.3497.122',
1302         '71.0.3571.1',
1303         '71.0.3571.0',
1304         '70.0.3538.48',
1305         '69.0.3497.121',
1306         '71.0.3570.1',
1307         '71.0.3570.0',
1308         '70.0.3538.47',
1309         '69.0.3497.120',
1310         '71.0.3568.2',
1311         '71.0.3569.1',
1312         '71.0.3569.0',
1313         '70.0.3538.46',
1314         '69.0.3497.119',
1315         '70.0.3538.45',
1316         '71.0.3568.1',
1317         '71.0.3568.0',
1318         '70.0.3538.44',
1319         '69.0.3497.118',
1320         '70.0.3538.43',
1321         '70.0.3538.42',
1322         '71.0.3567.1',
1323         '71.0.3567.0',
1324         '70.0.3538.41',
1325         '69.0.3497.117',
1326         '71.0.3566.1',
1327         '71.0.3566.0',
1328         '70.0.3538.40',
1329         '69.0.3497.116',
1330         '71.0.3565.1',
1331         '71.0.3565.0',
1332         '70.0.3538.39',
1333         '69.0.3497.115',
1334         '71.0.3564.1',
1335         '71.0.3564.0',
1336         '70.0.3538.38',
1337         '69.0.3497.114',
1338         '71.0.3563.0',
1339         '71.0.3562.2',
1340         '70.0.3538.37',
1341         '69.0.3497.113',
1342         '70.0.3538.36',
1343         '70.0.3538.35',
1344         '71.0.3562.1',
1345         '71.0.3562.0',
1346         '70.0.3538.34',
1347         '69.0.3497.112',
1348         '70.0.3538.33',
1349         '71.0.3561.1',
1350         '71.0.3561.0',
1351         '70.0.3538.32',
1352         '69.0.3497.111',
1353         '71.0.3559.6',
1354         '71.0.3560.1',
1355         '71.0.3560.0',
1356         '71.0.3559.5',
1357         '71.0.3559.4',
1358         '70.0.3538.31',
1359         '69.0.3497.110',
1360         '71.0.3559.3',
1361         '70.0.3538.30',
1362         '69.0.3497.109',
1363         '71.0.3559.2',
1364         '71.0.3559.1',
1365         '71.0.3559.0',
1366         '70.0.3538.29',
1367         '69.0.3497.108',
1368         '71.0.3558.2',
1369         '71.0.3558.1',
1370         '71.0.3558.0',
1371         '70.0.3538.28',
1372         '69.0.3497.107',
1373         '71.0.3557.2',
1374         '71.0.3557.1',
1375         '71.0.3557.0',
1376         '70.0.3538.27',
1377         '69.0.3497.106',
1378         '71.0.3554.4',
1379         '70.0.3538.26',
1380         '71.0.3556.1',
1381         '71.0.3556.0',
1382         '70.0.3538.25',
1383         '71.0.3554.3',
1384         '69.0.3497.105',
1385         '71.0.3554.2',
1386         '70.0.3538.24',
1387         '69.0.3497.104',
1388         '71.0.3555.2',
1389         '70.0.3538.23',
1390         '71.0.3555.1',
1391         '71.0.3555.0',
1392         '70.0.3538.22',
1393         '69.0.3497.103',
1394         '71.0.3554.1',
1395         '71.0.3554.0',
1396         '70.0.3538.21',
1397         '69.0.3497.102',
1398         '71.0.3553.3',
1399         '70.0.3538.20',
1400         '69.0.3497.101',
1401         '71.0.3553.2',
1402         '69.0.3497.100',
1403         '71.0.3553.1',
1404         '71.0.3553.0',
1405         '70.0.3538.19',
1406         '69.0.3497.99',
1407         '69.0.3497.98',
1408         '69.0.3497.97',
1409         '71.0.3552.6',
1410         '71.0.3552.5',
1411         '71.0.3552.4',
1412         '71.0.3552.3',
1413         '71.0.3552.2',
1414         '71.0.3552.1',
1415         '71.0.3552.0',
1416         '70.0.3538.18',
1417         '69.0.3497.96',
1418         '71.0.3551.3',
1419         '71.0.3551.2',
1420         '71.0.3551.1',
1421         '71.0.3551.0',
1422         '70.0.3538.17',
1423         '69.0.3497.95',
1424         '71.0.3550.3',
1425         '71.0.3550.2',
1426         '71.0.3550.1',
1427         '71.0.3550.0',
1428         '70.0.3538.16',
1429         '69.0.3497.94',
1430         '71.0.3549.1',
1431         '71.0.3549.0',
1432         '70.0.3538.15',
1433         '69.0.3497.93',
1434         '69.0.3497.92',
1435         '71.0.3548.1',
1436         '71.0.3548.0',
1437         '70.0.3538.14',
1438         '69.0.3497.91',
1439         '71.0.3547.1',
1440         '71.0.3547.0',
1441         '70.0.3538.13',
1442         '69.0.3497.90',
1443         '71.0.3546.2',
1444         '69.0.3497.89',
1445         '71.0.3546.1',
1446         '71.0.3546.0',
1447         '70.0.3538.12',
1448         '69.0.3497.88',
1449         '71.0.3545.4',
1450         '71.0.3545.3',
1451         '71.0.3545.2',
1452         '71.0.3545.1',
1453         '71.0.3545.0',
1454         '70.0.3538.11',
1455         '69.0.3497.87',
1456         '71.0.3544.5',
1457         '71.0.3544.4',
1458         '71.0.3544.3',
1459         '71.0.3544.2',
1460         '71.0.3544.1',
1461         '71.0.3544.0',
1462         '69.0.3497.86',
1463         '70.0.3538.10',
1464         '69.0.3497.85',
1465         '70.0.3538.9',
1466         '69.0.3497.84',
1467         '71.0.3543.4',
1468         '70.0.3538.8',
1469         '71.0.3543.3',
1470         '71.0.3543.2',
1471         '71.0.3543.1',
1472         '71.0.3543.0',
1473         '70.0.3538.7',
1474         '69.0.3497.83',
1475         '71.0.3542.2',
1476         '71.0.3542.1',
1477         '71.0.3542.0',
1478         '70.0.3538.6',
1479         '69.0.3497.82',
1480         '69.0.3497.81',
1481         '71.0.3541.1',
1482         '71.0.3541.0',
1483         '70.0.3538.5',
1484         '69.0.3497.80',
1485         '71.0.3540.1',
1486         '71.0.3540.0',
1487         '70.0.3538.4',
1488         '69.0.3497.79',
1489         '70.0.3538.3',
1490         '71.0.3539.1',
1491         '71.0.3539.0',
1492         '69.0.3497.78',
1493         '68.0.3440.134',
1494         '69.0.3497.77',
1495         '70.0.3538.2',
1496         '70.0.3538.1',
1497         '70.0.3538.0',
1498         '69.0.3497.76',
1499         '68.0.3440.133',
1500         '69.0.3497.75',
1501         '70.0.3537.2',
1502         '70.0.3537.1',
1503         '70.0.3537.0',
1504         '69.0.3497.74',
1505         '68.0.3440.132',
1506         '70.0.3536.0',
1507         '70.0.3535.5',
1508         '70.0.3535.4',
1509         '70.0.3535.3',
1510         '69.0.3497.73',
1511         '68.0.3440.131',
1512         '70.0.3532.8',
1513         '70.0.3532.7',
1514         '69.0.3497.72',
1515         '69.0.3497.71',
1516         '70.0.3535.2',
1517         '70.0.3535.1',
1518         '70.0.3535.0',
1519         '69.0.3497.70',
1520         '68.0.3440.130',
1521         '69.0.3497.69',
1522         '68.0.3440.129',
1523         '70.0.3534.4',
1524         '70.0.3534.3',
1525         '70.0.3534.2',
1526         '70.0.3534.1',
1527         '70.0.3534.0',
1528         '69.0.3497.68',
1529         '68.0.3440.128',
1530         '70.0.3533.2',
1531         '70.0.3533.1',
1532         '70.0.3533.0',
1533         '69.0.3497.67',
1534         '68.0.3440.127',
1535         '70.0.3532.6',
1536         '70.0.3532.5',
1537         '70.0.3532.4',
1538         '69.0.3497.66',
1539         '68.0.3440.126',
1540         '70.0.3532.3',
1541         '70.0.3532.2',
1542         '70.0.3532.1',
1543         '69.0.3497.60',
1544         '69.0.3497.65',
1545         '69.0.3497.64',
1546         '70.0.3532.0',
1547         '70.0.3531.0',
1548         '70.0.3530.4',
1549         '70.0.3530.3',
1550         '70.0.3530.2',
1551         '69.0.3497.58',
1552         '68.0.3440.125',
1553         '69.0.3497.57',
1554         '69.0.3497.56',
1555         '69.0.3497.55',
1556         '69.0.3497.54',
1557         '70.0.3530.1',
1558         '70.0.3530.0',
1559         '69.0.3497.53',
1560         '68.0.3440.124',
1561         '69.0.3497.52',
1562         '70.0.3529.3',
1563         '70.0.3529.2',
1564         '70.0.3529.1',
1565         '70.0.3529.0',
1566         '69.0.3497.51',
1567         '70.0.3528.4',
1568         '68.0.3440.123',
1569         '70.0.3528.3',
1570         '70.0.3528.2',
1571         '70.0.3528.1',
1572         '70.0.3528.0',
1573         '69.0.3497.50',
1574         '68.0.3440.122',
1575         '70.0.3527.1',
1576         '70.0.3527.0',
1577         '69.0.3497.49',
1578         '68.0.3440.121',
1579         '70.0.3526.1',
1580         '70.0.3526.0',
1581         '68.0.3440.120',
1582         '69.0.3497.48',
1583         '69.0.3497.47',
1584         '68.0.3440.119',
1585         '68.0.3440.118',
1586         '70.0.3525.5',
1587         '70.0.3525.4',
1588         '70.0.3525.3',
1589         '68.0.3440.117',
1590         '69.0.3497.46',
1591         '70.0.3525.2',
1592         '70.0.3525.1',
1593         '70.0.3525.0',
1594         '69.0.3497.45',
1595         '68.0.3440.116',
1596         '70.0.3524.4',
1597         '70.0.3524.3',
1598         '69.0.3497.44',
1599         '70.0.3524.2',
1600         '70.0.3524.1',
1601         '70.0.3524.0',
1602         '70.0.3523.2',
1603         '69.0.3497.43',
1604         '68.0.3440.115',
1605         '70.0.3505.9',
1606         '69.0.3497.42',
1607         '70.0.3505.8',
1608         '70.0.3523.1',
1609         '70.0.3523.0',
1610         '69.0.3497.41',
1611         '68.0.3440.114',
1612         '70.0.3505.7',
1613         '69.0.3497.40',
1614         '70.0.3522.1',
1615         '70.0.3522.0',
1616         '70.0.3521.2',
1617         '69.0.3497.39',
1618         '68.0.3440.113',
1619         '70.0.3505.6',
1620         '70.0.3521.1',
1621         '70.0.3521.0',
1622         '69.0.3497.38',
1623         '68.0.3440.112',
1624         '70.0.3520.1',
1625         '70.0.3520.0',
1626         '69.0.3497.37',
1627         '68.0.3440.111',
1628         '70.0.3519.3',
1629         '70.0.3519.2',
1630         '70.0.3519.1',
1631         '70.0.3519.0',
1632         '69.0.3497.36',
1633         '68.0.3440.110',
1634         '70.0.3518.1',
1635         '70.0.3518.0',
1636         '69.0.3497.35',
1637         '69.0.3497.34',
1638         '68.0.3440.109',
1639         '70.0.3517.1',
1640         '70.0.3517.0',
1641         '69.0.3497.33',
1642         '68.0.3440.108',
1643         '69.0.3497.32',
1644         '70.0.3516.3',
1645         '70.0.3516.2',
1646         '70.0.3516.1',
1647         '70.0.3516.0',
1648         '69.0.3497.31',
1649         '68.0.3440.107',
1650         '70.0.3515.4',
1651         '68.0.3440.106',
1652         '70.0.3515.3',
1653         '70.0.3515.2',
1654         '70.0.3515.1',
1655         '70.0.3515.0',
1656         '69.0.3497.30',
1657         '68.0.3440.105',
1658         '68.0.3440.104',
1659         '70.0.3514.2',
1660         '70.0.3514.1',
1661         '70.0.3514.0',
1662         '69.0.3497.29',
1663         '68.0.3440.103',
1664         '70.0.3513.1',
1665         '70.0.3513.0',
1666         '69.0.3497.28',
1667     )
1668     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
1671 std_headers = {
1672     'User-Agent': random_user_agent(),
1673     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675     'Accept-Encoding': 'gzip, deflate',
1676     'Accept-Language': 'en-us,en;q=0.5',
1677 }
1678
1679
1680 USER_AGENTS = {
1681     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682 }
1683
1684
1685 NO_DEFAULT = object()
1686
1687 ENGLISH_MONTH_NAMES = [
1688     'January', 'February', 'March', 'April', 'May', 'June',
1689     'July', 'August', 'September', 'October', 'November', 'December']
1690
1691 MONTH_NAMES = {
1692     'en': ENGLISH_MONTH_NAMES,
1693     'fr': [
1694         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1696 }
1697
1698 KNOWN_EXTENSIONS = (
1699     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700     'flv', 'f4v', 'f4a', 'f4b',
1701     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702     'mkv', 'mka', 'mk3d',
1703     'avi', 'divx',
1704     'mov',
1705     'asf', 'wmv', 'wma',
1706     '3gp', '3g2',
1707     'mp3',
1708     'flac',
1709     'ape',
1710     'wav',
1711     'f4f', 'f4m', 'm3u8', 'smil')
1712
1713 # needed for sanitizing filenames in restricted mode
1714 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1715                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1717
1718 DATE_FORMATS = (
1719     '%d %B %Y',
1720     '%d %b %Y',
1721     '%B %d %Y',
1722     '%B %dst %Y',
1723     '%B %dnd %Y',
1724     '%B %drd %Y',
1725     '%B %dth %Y',
1726     '%b %d %Y',
1727     '%b %dst %Y',
1728     '%b %dnd %Y',
1729     '%b %drd %Y',
1730     '%b %dth %Y',
1731     '%b %dst %Y %I:%M',
1732     '%b %dnd %Y %I:%M',
1733     '%b %drd %Y %I:%M',
1734     '%b %dth %Y %I:%M',
1735     '%Y %m %d',
1736     '%Y-%m-%d',
1737     '%Y/%m/%d',
1738     '%Y/%m/%d %H:%M',
1739     '%Y/%m/%d %H:%M:%S',
1740     '%Y-%m-%d %H:%M',
1741     '%Y-%m-%d %H:%M:%S',
1742     '%Y-%m-%d %H:%M:%S.%f',
1743     '%d.%m.%Y %H:%M',
1744     '%d.%m.%Y %H.%M',
1745     '%Y-%m-%dT%H:%M:%SZ',
1746     '%Y-%m-%dT%H:%M:%S.%fZ',
1747     '%Y-%m-%dT%H:%M:%S.%f0Z',
1748     '%Y-%m-%dT%H:%M:%S',
1749     '%Y-%m-%dT%H:%M:%S.%f',
1750     '%Y-%m-%dT%H:%M',
1751     '%b %d %Y at %H:%M',
1752     '%b %d %Y at %H:%M:%S',
1753     '%B %d %Y at %H:%M',
1754     '%B %d %Y at %H:%M:%S',
1755 )
1756
1757 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758 DATE_FORMATS_DAY_FIRST.extend([
1759     '%d-%m-%Y',
1760     '%d.%m.%Y',
1761     '%d.%m.%y',
1762     '%d/%m/%Y',
1763     '%d/%m/%y',
1764     '%d/%m/%Y %H:%M:%S',
1765 ])
1766
1767 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_MONTH_FIRST.extend([
1769     '%m-%d-%Y',
1770     '%m.%d.%Y',
1771     '%m/%d/%Y',
1772     '%m/%d/%y',
1773     '%m/%d/%Y %H:%M:%S',
1774 ])
1775
1776 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1777 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1778
1779
1780 def preferredencoding():
1781     """Get preferred encoding.
1782
1783     Returns the best encoding scheme for the system, based on
1784     locale.getpreferredencoding() and some further tweaks.
1785     """
1786     try:
1787         pref = locale.getpreferredencoding()
1788         'TEST'.encode(pref)
1789     except Exception:
1790         pref = 'UTF-8'
1791
1792     return pref
1793
1794
1795 def write_json_file(obj, fn):
1796     """ Encode obj as JSON and write it to fn, atomically if possible """
1797
1798     fn = encodeFilename(fn)
1799     if sys.version_info < (3, 0) and sys.platform != 'win32':
1800         encoding = get_filesystem_encoding()
1801         # os.path.basename returns a bytes object, but NamedTemporaryFile
1802         # will fail if the filename contains non ascii characters unless we
1803         # use a unicode object
1804         path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805         # the same for os.path.dirname
1806         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807     else:
1808         path_basename = os.path.basename
1809         path_dirname = os.path.dirname
1810
1811     args = {
1812         'suffix': '.tmp',
1813         'prefix': path_basename(fn) + '.',
1814         'dir': path_dirname(fn),
1815         'delete': False,
1816     }
1817
1818     # In Python 2.x, json.dump expects a bytestream.
1819     # In Python 3.x, it writes to a character stream
1820     if sys.version_info < (3, 0):
1821         args['mode'] = 'wb'
1822     else:
1823         args.update({
1824             'mode': 'w',
1825             'encoding': 'utf-8',
1826         })
1827
1828     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1829
1830     try:
1831         with tf:
1832             json.dump(obj, tf)
1833         if sys.platform == 'win32':
1834             # Need to remove existing file on Windows, else os.rename raises
1835             # WindowsError or FileExistsError.
1836             try:
1837                 os.unlink(fn)
1838             except OSError:
1839                 pass
1840         try:
1841             mask = os.umask(0)
1842             os.umask(mask)
1843             os.chmod(tf.name, 0o666 & ~mask)
1844         except OSError:
1845             pass
1846         os.rename(tf.name, fn)
1847     except Exception:
1848         try:
1849             os.remove(tf.name)
1850         except OSError:
1851             pass
1852         raise
1853
1854
1855 if sys.version_info >= (2, 7):
1856     def find_xpath_attr(node, xpath, key, val=None):
1857         """ Find the xpath xpath[@key=val] """
1858         assert re.match(r'^[a-zA-Z_-]+$', key)
1859         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1860         return node.find(expr)
1861 else:
1862     def find_xpath_attr(node, xpath, key, val=None):
1863         for f in node.findall(compat_xpath(xpath)):
1864             if key not in f.attrib:
1865                 continue
1866             if val is None or f.attrib.get(key) == val:
1867                 return f
1868         return None
1869
1870 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1871 # the namespace parameter
1872
1873
1874 def xpath_with_ns(path, ns_map):
1875     components = [c.split(':') for c in path.split('/')]
1876     replaced = []
1877     for c in components:
1878         if len(c) == 1:
1879             replaced.append(c[0])
1880         else:
1881             ns, tag = c
1882             replaced.append('{%s}%s' % (ns_map[ns], tag))
1883     return '/'.join(replaced)
1884
1885
1886 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1887     def _find_xpath(xpath):
1888         return node.find(compat_xpath(xpath))
1889
1890     if isinstance(xpath, (str, compat_str)):
1891         n = _find_xpath(xpath)
1892     else:
1893         for xp in xpath:
1894             n = _find_xpath(xp)
1895             if n is not None:
1896                 break
1897
1898     if n is None:
1899         if default is not NO_DEFAULT:
1900             return default
1901         elif fatal:
1902             name = xpath if name is None else name
1903             raise ExtractorError('Could not find XML element %s' % name)
1904         else:
1905             return None
1906     return n
1907
1908
1909 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1910     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911     if n is None or n == default:
1912         return n
1913     if n.text is None:
1914         if default is not NO_DEFAULT:
1915             return default
1916         elif fatal:
1917             name = xpath if name is None else name
1918             raise ExtractorError('Could not find XML element\'s text %s' % name)
1919         else:
1920             return None
1921     return n.text
1922
1923
1924 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925     n = find_xpath_attr(node, xpath, key)
1926     if n is None:
1927         if default is not NO_DEFAULT:
1928             return default
1929         elif fatal:
1930             name = '%s[@%s]' % (xpath, key) if name is None else name
1931             raise ExtractorError('Could not find XML attribute %s' % name)
1932         else:
1933             return None
1934     return n.attrib[key]
1935
1936
1937 def get_element_by_id(id, html):
1938     """Return the content of the tag with the specified ID in the passed HTML document"""
1939     return get_element_by_attribute('id', id, html)
1940
1941
1942 def get_element_by_class(class_name, html):
1943     """Return the content of the first tag with the specified class in the passed HTML document"""
1944     retval = get_elements_by_class(class_name, html)
1945     return retval[0] if retval else None
1946
1947
1948 def get_element_by_attribute(attribute, value, html, escape_value=True):
1949     retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950     return retval[0] if retval else None
1951
1952
1953 def get_elements_by_class(class_name, html):
1954     """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955     return get_elements_by_attribute(
1956         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957         html, escape_value=False)
1958
1959
1960 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1961     """Return the content of the tag with the specified attribute in the passed HTML document"""
1962
1963     value = re.escape(value) if escape_value else value
1964
1965     retlist = []
1966     for m in re.finditer(r'''(?xs)
1967         <([a-zA-Z0-9:._-]+)
1968          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1969          \s+%s=['"]?%s['"]?
1970          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1971         \s*>
1972         (?P<content>.*?)
1973         </\1>
1974     ''' % (re.escape(attribute), value), html):
1975         res = m.group('content')
1976
1977         if res.startswith('"') or res.startswith("'"):
1978             res = res[1:-1]
1979
1980         retlist.append(unescapeHTML(res))
1981
1982     return retlist
1983
1984
1985 class HTMLAttributeParser(compat_HTMLParser):
1986     """Trivial HTML parser to gather the attributes for a single element"""
1987
1988     def __init__(self):
1989         self.attrs = {}
1990         compat_HTMLParser.__init__(self)
1991
1992     def handle_starttag(self, tag, attrs):
1993         self.attrs = dict(attrs)
1994
1995
1996 def extract_attributes(html_element):
1997     """Given a string for an HTML element such as
1998     <el
1999          a="foo" B="bar" c="&98;az" d=boz
2000          empty= noval entity="&amp;"
2001          sq='"' dq="'"
2002     >
2003     Decode and return a dictionary of attributes.
2004     {
2005         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2006         'empty': '', 'noval': None, 'entity': '&',
2007         'sq': '"', 'dq': '\''
2008     }.
2009     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2010     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2011     """
2012     parser = HTMLAttributeParser()
2013     try:
2014         parser.feed(html_element)
2015         parser.close()
2016     # Older Python may throw HTMLParseError in case of malformed HTML
2017     except compat_HTMLParseError:
2018         pass
2019     return parser.attrs
2020
2021
2022 def clean_html(html):
2023     """Clean an HTML snippet into a readable string"""
2024
2025     if html is None:  # Convenience for sanitizing descriptions etc.
2026         return html
2027
2028     # Newline vs <br />
2029     html = html.replace('\n', ' ')
2030     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2031     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2032     # Strip html tags
2033     html = re.sub('<.*?>', '', html)
2034     # Replace html entities
2035     html = unescapeHTML(html)
2036     return html.strip()
2037
2038
2039 def sanitize_open(filename, open_mode):
2040     """Try to open the given filename, and slightly tweak it if this fails.
2041
2042     Attempts to open the given filename. If this fails, it tries to change
2043     the filename slightly, step by step, until it's either able to open it
2044     or it fails and raises a final exception, like the standard open()
2045     function.
2046
2047     It returns the tuple (stream, definitive_file_name).
2048     """
2049     try:
2050         if filename == '-':
2051             if sys.platform == 'win32':
2052                 import msvcrt
2053                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2054             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2055         stream = open(encodeFilename(filename), open_mode)
2056         return (stream, filename)
2057     except (IOError, OSError) as err:
2058         if err.errno in (errno.EACCES,):
2059             raise
2060
2061         # In case of error, try to remove win32 forbidden chars
2062         alt_filename = sanitize_path(filename)
2063         if alt_filename == filename:
2064             raise
2065         else:
2066             # An exception here should be caught in the caller
2067             stream = open(encodeFilename(alt_filename), open_mode)
2068             return (stream, alt_filename)
2069
2070
2071 def timeconvert(timestr):
2072     """Convert RFC 2822 defined time string into system timestamp"""
2073     timestamp = None
2074     timetuple = email.utils.parsedate_tz(timestr)
2075     if timetuple is not None:
2076         timestamp = email.utils.mktime_tz(timetuple)
2077     return timestamp
2078
2079
2080 def sanitize_filename(s, restricted=False, is_id=False):
2081     """Sanitizes a string so it could be used as part of a filename.
2082     If restricted is set, use a stricter subset of allowed characters.
2083     Set is_id if this is not an arbitrary string, but an ID that should be kept
2084     if possible.
2085     """
2086     def replace_insane(char):
2087         if restricted and char in ACCENT_CHARS:
2088             return ACCENT_CHARS[char]
2089         if char == '?' or ord(char) < 32 or ord(char) == 127:
2090             return ''
2091         elif char == '"':
2092             return '' if restricted else '\''
2093         elif char == ':':
2094             return '_-' if restricted else ' -'
2095         elif char in '\\/|*<>':
2096             return '_'
2097         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2098             return '_'
2099         if restricted and ord(char) > 127:
2100             return '_'
2101         return char
2102
2103     # Handle timestamps
2104     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2105     result = ''.join(map(replace_insane, s))
2106     if not is_id:
2107         while '__' in result:
2108             result = result.replace('__', '_')
2109         result = result.strip('_')
2110         # Common case of "Foreign band name - English song title"
2111         if restricted and result.startswith('-_'):
2112             result = result[2:]
2113         if result.startswith('-'):
2114             result = '_' + result[len('-'):]
2115         result = result.lstrip('.')
2116         if not result:
2117             result = '_'
2118     return result
2119
2120
2121 def sanitize_path(s):
2122     """Sanitizes and normalizes path on Windows"""
2123     if sys.platform != 'win32':
2124         return s
2125     drive_or_unc, _ = os.path.splitdrive(s)
2126     if sys.version_info < (2, 7) and not drive_or_unc:
2127         drive_or_unc, _ = os.path.splitunc(s)
2128     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2129     if drive_or_unc:
2130         norm_path.pop(0)
2131     sanitized_path = [
2132         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2133         for path_part in norm_path]
2134     if drive_or_unc:
2135         sanitized_path.insert(0, drive_or_unc + os.path.sep)
2136     return os.path.join(*sanitized_path)
2137
2138
2139 def sanitize_url(url):
2140     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2141     # the number of unwanted failures due to missing protocol
2142     if url.startswith('//'):
2143         return 'http:%s' % url
2144     # Fix some common typos seen so far
2145     COMMON_TYPOS = (
2146         # https://github.com/ytdl-org/youtube-dl/issues/15649
2147         (r'^httpss://', r'https://'),
2148         # https://bx1.be/lives/direct-tv/
2149         (r'^rmtp([es]?)://', r'rtmp\1://'),
2150     )
2151     for mistake, fixup in COMMON_TYPOS:
2152         if re.match(mistake, url):
2153             return re.sub(mistake, fixup, url)
2154     return url
2155
2156
2157 def sanitized_Request(url, *args, **kwargs):
2158     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2159
2160
2161 def expand_path(s):
2162     """Expand shell variables and ~"""
2163     return os.path.expandvars(compat_expanduser(s))
2164
2165
2166 def orderedSet(iterable):
2167     """ Remove all duplicates from the input iterable """
2168     res = []
2169     for el in iterable:
2170         if el not in res:
2171             res.append(el)
2172     return res
2173
2174
2175 def _htmlentity_transform(entity_with_semicolon):
2176     """Transforms an HTML entity to a character."""
2177     entity = entity_with_semicolon[:-1]
2178
2179     # Known non-numeric HTML entity
2180     if entity in compat_html_entities.name2codepoint:
2181         return compat_chr(compat_html_entities.name2codepoint[entity])
2182
2183     # TODO: HTML5 allows entities without a semicolon. For example,
2184     # '&Eacuteric' should be decoded as 'Éric'.
2185     if entity_with_semicolon in compat_html_entities_html5:
2186         return compat_html_entities_html5[entity_with_semicolon]
2187
2188     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2189     if mobj is not None:
2190         numstr = mobj.group(1)
2191         if numstr.startswith('x'):
2192             base = 16
2193             numstr = '0%s' % numstr
2194         else:
2195             base = 10
2196         # See https://github.com/ytdl-org/youtube-dl/issues/7518
2197         try:
2198             return compat_chr(int(numstr, base))
2199         except ValueError:
2200             pass
2201
2202     # Unknown entity in name, return its literal representation
2203     return '&%s;' % entity
2204
2205
2206 def unescapeHTML(s):
2207     if s is None:
2208         return None
2209     assert type(s) == compat_str
2210
2211     return re.sub(
2212         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2213
2214
2215 def get_subprocess_encoding():
2216     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2217         # For subprocess calls, encode with locale encoding
2218         # Refer to http://stackoverflow.com/a/9951851/35070
2219         encoding = preferredencoding()
2220     else:
2221         encoding = sys.getfilesystemencoding()
2222     if encoding is None:
2223         encoding = 'utf-8'
2224     return encoding
2225
2226
2227 def encodeFilename(s, for_subprocess=False):
2228     """
2229     @param s The name of the file
2230     """
2231
2232     assert type(s) == compat_str
2233
2234     # Python 3 has a Unicode API
2235     if sys.version_info >= (3, 0):
2236         return s
2237
2238     # Pass '' directly to use Unicode APIs on Windows 2000 and up
2239     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2240     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2241     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2242         return s
2243
2244     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2245     if sys.platform.startswith('java'):
2246         return s
2247
2248     return s.encode(get_subprocess_encoding(), 'ignore')
2249
2250
2251 def decodeFilename(b, for_subprocess=False):
2252
2253     if sys.version_info >= (3, 0):
2254         return b
2255
2256     if not isinstance(b, bytes):
2257         return b
2258
2259     return b.decode(get_subprocess_encoding(), 'ignore')
2260
2261
2262 def encodeArgument(s):
2263     if not isinstance(s, compat_str):
2264         # Legacy code that uses byte strings
2265         # Uncomment the following line after fixing all post processors
2266         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2267         s = s.decode('ascii')
2268     return encodeFilename(s, True)
2269
2270
2271 def decodeArgument(b):
2272     return decodeFilename(b, True)
2273
2274
2275 def decodeOption(optval):
2276     if optval is None:
2277         return optval
2278     if isinstance(optval, bytes):
2279         optval = optval.decode(preferredencoding())
2280
2281     assert isinstance(optval, compat_str)
2282     return optval
2283
2284
2285 def formatSeconds(secs):
2286     if secs > 3600:
2287         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2288     elif secs > 60:
2289         return '%d:%02d' % (secs // 60, secs % 60)
2290     else:
2291         return '%d' % secs
2292
2293
2294 def make_HTTPS_handler(params, **kwargs):
2295     opts_no_check_certificate = params.get('nocheckcertificate', False)
2296     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
2297         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2298         if opts_no_check_certificate:
2299             context.check_hostname = False
2300             context.verify_mode = ssl.CERT_NONE
2301         try:
2302             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2303         except TypeError:
2304             # Python 2.7.8
2305             # (create_default_context present but HTTPSHandler has no context=)
2306             pass
2307
2308     if sys.version_info < (3, 2):
2309         return YoutubeDLHTTPSHandler(params, **kwargs)
2310     else:  # Python < 3.4
2311         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2312         context.verify_mode = (ssl.CERT_NONE
2313                                if opts_no_check_certificate
2314                                else ssl.CERT_REQUIRED)
2315         context.set_default_verify_paths()
2316         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2317
2318
2319 def bug_reports_message():
2320     if ytdl_is_updateable():
2321         update_cmd = 'type  youtube-dlc -U  to update'
2322     else:
2323         update_cmd = 'see  https://github.com/blackjack4494/yt-dlc  on how to update'
2324     msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
2325     msg += ' Make sure you are using the latest version; %s.' % update_cmd
2326     msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2327     return msg
2328
2329
2330 class YoutubeDLError(Exception):
2331     """Base exception for YoutubeDL errors."""
2332     pass
2333
2334
2335 class ExtractorError(YoutubeDLError):
2336     """Error during info extraction."""
2337
2338     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2339         """ tb, if given, is the original traceback (so that it can be printed out).
2340         If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2341         """
2342
2343         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2344             expected = True
2345         if video_id is not None:
2346             msg = video_id + ': ' + msg
2347         if cause:
2348             msg += ' (caused by %r)' % cause
2349         if not expected:
2350             msg += bug_reports_message()
2351         super(ExtractorError, self).__init__(msg)
2352
2353         self.traceback = tb
2354         self.exc_info = sys.exc_info()  # preserve original exception
2355         self.cause = cause
2356         self.video_id = video_id
2357
2358     def format_traceback(self):
2359         if self.traceback is None:
2360             return None
2361         return ''.join(traceback.format_tb(self.traceback))
2362
2363
2364 class UnsupportedError(ExtractorError):
2365     def __init__(self, url):
2366         super(UnsupportedError, self).__init__(
2367             'Unsupported URL: %s' % url, expected=True)
2368         self.url = url
2369
2370
2371 class RegexNotFoundError(ExtractorError):
2372     """Error when a regex didn't match"""
2373     pass
2374
2375
2376 class GeoRestrictedError(ExtractorError):
2377     """Geographic restriction Error exception.
2378
2379     This exception may be thrown when a video is not available from your
2380     geographic location due to geographic restrictions imposed by a website.
2381     """
2382
2383     def __init__(self, msg, countries=None):
2384         super(GeoRestrictedError, self).__init__(msg, expected=True)
2385         self.msg = msg
2386         self.countries = countries
2387
2388
2389 class DownloadError(YoutubeDLError):
2390     """Download Error exception.
2391
2392     This exception may be thrown by FileDownloader objects if they are not
2393     configured to continue on errors. They will contain the appropriate
2394     error message.
2395     """
2396
2397     def __init__(self, msg, exc_info=None):
2398         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2399         super(DownloadError, self).__init__(msg)
2400         self.exc_info = exc_info
2401
2402
2403 class SameFileError(YoutubeDLError):
2404     """Same File exception.
2405
2406     This exception will be thrown by FileDownloader objects if they detect
2407     multiple files would have to be downloaded to the same file on disk.
2408     """
2409     pass
2410
2411
2412 class PostProcessingError(YoutubeDLError):
2413     """Post Processing exception.
2414
2415     This exception may be raised by PostProcessor's .run() method to
2416     indicate an error in the postprocessing task.
2417     """
2418
2419     def __init__(self, msg):
2420         super(PostProcessingError, self).__init__(msg)
2421         self.msg = msg
2422
2423
2424 class MaxDownloadsReached(YoutubeDLError):
2425     """ --max-downloads limit has been reached. """
2426     pass
2427
2428
2429 class UnavailableVideoError(YoutubeDLError):
2430     """Unavailable Format exception.
2431
2432     This exception will be thrown when a video is requested
2433     in a format that is not available for that video.
2434     """
2435     pass
2436
2437
2438 class ContentTooShortError(YoutubeDLError):
2439     """Content Too Short exception.
2440
2441     This exception may be raised by FileDownloader objects when a file they
2442     download is too small for what the server announced first, indicating
2443     the connection was probably interrupted.
2444     """
2445
2446     def __init__(self, downloaded, expected):
2447         super(ContentTooShortError, self).__init__(
2448             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2449         )
2450         # Both in bytes
2451         self.downloaded = downloaded
2452         self.expected = expected
2453
2454
2455 class XAttrMetadataError(YoutubeDLError):
2456     def __init__(self, code=None, msg='Unknown error'):
2457         super(XAttrMetadataError, self).__init__(msg)
2458         self.code = code
2459         self.msg = msg
2460
2461         # Parsing code and msg
2462         if (self.code in (errno.ENOSPC, errno.EDQUOT)
2463                 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
2464             self.reason = 'NO_SPACE'
2465         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2466             self.reason = 'VALUE_TOO_LONG'
2467         else:
2468             self.reason = 'NOT_SUPPORTED'
2469
2470
2471 class XAttrUnavailableError(YoutubeDLError):
2472     pass
2473
2474
2475 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2476     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2477     # expected HTTP responses to meet HTTP/1.0 or later (see also
2478     # https://github.com/ytdl-org/youtube-dl/issues/6727)
2479     if sys.version_info < (3, 0):
2480         kwargs['strict'] = True
2481     hc = http_class(*args, **compat_kwargs(kwargs))
2482     source_address = ydl_handler._params.get('source_address')
2483
2484     if source_address is not None:
2485         # This is to workaround _create_connection() from socket where it will try all
2486         # address data from getaddrinfo() including IPv6. This filters the result from
2487         # getaddrinfo() based on the source_address value.
2488         # This is based on the cpython socket.create_connection() function.
2489         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2490         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2491             host, port = address
2492             err = None
2493             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2494             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2495             ip_addrs = [addr for addr in addrs if addr[0] == af]
2496             if addrs and not ip_addrs:
2497                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2498                 raise socket.error(
2499                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
2500                     % (ip_version, source_address[0]))
2501             for res in ip_addrs:
2502                 af, socktype, proto, canonname, sa = res
2503                 sock = None
2504                 try:
2505                     sock = socket.socket(af, socktype, proto)
2506                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2507                         sock.settimeout(timeout)
2508                     sock.bind(source_address)
2509                     sock.connect(sa)
2510                     err = None  # Explicitly break reference cycle
2511                     return sock
2512                 except socket.error as _:
2513                     err = _
2514                     if sock is not None:
2515                         sock.close()
2516             if err is not None:
2517                 raise err
2518             else:
2519                 raise socket.error('getaddrinfo returns an empty list')
2520         if hasattr(hc, '_create_connection'):
2521             hc._create_connection = _create_connection
2522         sa = (source_address, 0)
2523         if hasattr(hc, 'source_address'):  # Python 2.7+
2524             hc.source_address = sa
2525         else:  # Python 2.6
2526             def _hc_connect(self, *args, **kwargs):
2527                 sock = _create_connection(
2528                     (self.host, self.port), self.timeout, sa)
2529                 if is_https:
2530                     self.sock = ssl.wrap_socket(
2531                         sock, self.key_file, self.cert_file,
2532                         ssl_version=ssl.PROTOCOL_TLSv1)
2533                 else:
2534                     self.sock = sock
2535             hc.connect = functools.partial(_hc_connect, hc)
2536
2537     return hc
2538
2539
2540 def handle_youtubedl_headers(headers):
2541     filtered_headers = headers
2542
2543     if 'Youtubedl-no-compression' in filtered_headers:
2544         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2545         del filtered_headers['Youtubedl-no-compression']
2546
2547     return filtered_headers
2548
2549
2550 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2551     """Handler for HTTP requests and responses.
2552
2553     This class, when installed with an OpenerDirector, automatically adds
2554     the standard headers to every HTTP request and handles gzipped and
2555     deflated responses from web servers. If compression is to be avoided in
2556     a particular request, the original request in the program code only has
2557     to include the HTTP header "Youtubedl-no-compression", which will be
2558     removed before making the real request.
2559
2560     Part of this code was copied from:
2561
2562     http://techknack.net/python-urllib2-handlers/
2563
2564     Andrew Rowls, the author of that code, agreed to release it to the
2565     public domain.
2566     """
2567
2568     def __init__(self, params, *args, **kwargs):
2569         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2570         self._params = params
2571
2572     def http_open(self, req):
2573         conn_class = compat_http_client.HTTPConnection
2574
2575         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2576         if socks_proxy:
2577             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2578             del req.headers['Ytdl-socks-proxy']
2579
2580         return self.do_open(functools.partial(
2581             _create_http_connection, self, conn_class, False),
2582             req)
2583
2584     @staticmethod
2585     def deflate(data):
2586         try:
2587             return zlib.decompress(data, -zlib.MAX_WBITS)
2588         except zlib.error:
2589             return zlib.decompress(data)
2590
2591     def http_request(self, req):
2592         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2593         # always respected by websites, some tend to give out URLs with non percent-encoded
2594         # non-ASCII characters (see telemb.py, ard.py [#3412])
2595         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2596         # To work around aforementioned issue we will replace request's original URL with
2597         # percent-encoded one
2598         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2599         # the code of this workaround has been moved here from YoutubeDL.urlopen()
2600         url = req.get_full_url()
2601         url_escaped = escape_url(url)
2602
2603         # Substitute URL if any change after escaping
2604         if url != url_escaped:
2605             req = update_Request(req, url=url_escaped)
2606
2607         for h, v in std_headers.items():
2608             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2609             # The dict keys are capitalized because of this bug by urllib
2610             if h.capitalize() not in req.headers:
2611                 req.add_header(h, v)
2612
2613         req.headers = handle_youtubedl_headers(req.headers)
2614
2615         if sys.version_info < (2, 7) and '#' in req.get_full_url():
2616             # Python 2.6 is brain-dead when it comes to fragments
2617             req._Request__original = req._Request__original.partition('#')[0]
2618             req._Request__r_type = req._Request__r_type.partition('#')[0]
2619
2620         return req
2621
2622     def http_response(self, req, resp):
2623         old_resp = resp
2624         # gzip
2625         if resp.headers.get('Content-encoding', '') == 'gzip':
2626             content = resp.read()
2627             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2628             try:
2629                 uncompressed = io.BytesIO(gz.read())
2630             except IOError as original_ioerror:
2631                 # There may be junk add the end of the file
2632                 # See http://stackoverflow.com/q/4928560/35070 for details
2633                 for i in range(1, 1024):
2634                     try:
2635                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2636                         uncompressed = io.BytesIO(gz.read())
2637                     except IOError:
2638                         continue
2639                     break
2640                 else:
2641                     raise original_ioerror
2642             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2643             resp.msg = old_resp.msg
2644             del resp.headers['Content-encoding']
2645         # deflate
2646         if resp.headers.get('Content-encoding', '') == 'deflate':
2647             gz = io.BytesIO(self.deflate(resp.read()))
2648             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2649             resp.msg = old_resp.msg
2650             del resp.headers['Content-encoding']
2651         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2652         # https://github.com/ytdl-org/youtube-dl/issues/6457).
2653         if 300 <= resp.code < 400:
2654             location = resp.headers.get('Location')
2655             if location:
2656                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2657                 if sys.version_info >= (3, 0):
2658                     location = location.encode('iso-8859-1').decode('utf-8')
2659                 else:
2660                     location = location.decode('utf-8')
2661                 location_escaped = escape_url(location)
2662                 if location != location_escaped:
2663                     del resp.headers['Location']
2664                     if sys.version_info < (3, 0):
2665                         location_escaped = location_escaped.encode('utf-8')
2666                     resp.headers['Location'] = location_escaped
2667         return resp
2668
2669     https_request = http_request
2670     https_response = http_response
2671
2672
2673 def make_socks_conn_class(base_class, socks_proxy):
2674     assert issubclass(base_class, (
2675         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2676
2677     url_components = compat_urlparse.urlparse(socks_proxy)
2678     if url_components.scheme.lower() == 'socks5':
2679         socks_type = ProxyType.SOCKS5
2680     elif url_components.scheme.lower() in ('socks', 'socks4'):
2681         socks_type = ProxyType.SOCKS4
2682     elif url_components.scheme.lower() == 'socks4a':
2683         socks_type = ProxyType.SOCKS4A
2684
2685     def unquote_if_non_empty(s):
2686         if not s:
2687             return s
2688         return compat_urllib_parse_unquote_plus(s)
2689
2690     proxy_args = (
2691         socks_type,
2692         url_components.hostname, url_components.port or 1080,
2693         True,  # Remote DNS
2694         unquote_if_non_empty(url_components.username),
2695         unquote_if_non_empty(url_components.password),
2696     )
2697
2698     class SocksConnection(base_class):
2699         def connect(self):
2700             self.sock = sockssocket()
2701             self.sock.setproxy(*proxy_args)
2702             if type(self.timeout) in (int, float):
2703                 self.sock.settimeout(self.timeout)
2704             self.sock.connect((self.host, self.port))
2705
2706             if isinstance(self, compat_http_client.HTTPSConnection):
2707                 if hasattr(self, '_context'):  # Python > 2.6
2708                     self.sock = self._context.wrap_socket(
2709                         self.sock, server_hostname=self.host)
2710                 else:
2711                     self.sock = ssl.wrap_socket(self.sock)
2712
2713     return SocksConnection
2714
2715
2716 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2717     def __init__(self, params, https_conn_class=None, *args, **kwargs):
2718         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2719         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2720         self._params = params
2721
2722     def https_open(self, req):
2723         kwargs = {}
2724         conn_class = self._https_conn_class
2725
2726         if hasattr(self, '_context'):  # python > 2.6
2727             kwargs['context'] = self._context
2728         if hasattr(self, '_check_hostname'):  # python 3.x
2729             kwargs['check_hostname'] = self._check_hostname
2730
2731         socks_proxy = req.headers.get('Ytdl-socks-proxy')
2732         if socks_proxy:
2733             conn_class = make_socks_conn_class(conn_class, socks_proxy)
2734             del req.headers['Ytdl-socks-proxy']
2735
2736         return self.do_open(functools.partial(
2737             _create_http_connection, self, conn_class, True),
2738             req, **kwargs)
2739
2740
2741 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2742     """
2743     See [1] for cookie file format.
2744
2745     1. https://curl.haxx.se/docs/http-cookies.html
2746     """
2747     _HTTPONLY_PREFIX = '#HttpOnly_'
2748     _ENTRY_LEN = 7
2749     _HEADER = '''# Netscape HTTP Cookie File
2750 # This file is generated by youtube-dlc.  Do not edit.
2751
2752 '''
2753     _CookieFileEntry = collections.namedtuple(
2754         'CookieFileEntry',
2755         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2756
2757     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2758         """
2759         Save cookies to a file.
2760
2761         Most of the code is taken from CPython 3.8 and slightly adapted
2762         to support cookie files with UTF-8 in both python 2 and 3.
2763         """
2764         if filename is None:
2765             if self.filename is not None:
2766                 filename = self.filename
2767             else:
2768                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2769
2770         # Store session cookies with `expires` set to 0 instead of an empty
2771         # string
2772         for cookie in self:
2773             if cookie.expires is None:
2774                 cookie.expires = 0
2775
2776         with io.open(filename, 'w', encoding='utf-8') as f:
2777             f.write(self._HEADER)
2778             now = time.time()
2779             for cookie in self:
2780                 if not ignore_discard and cookie.discard:
2781                     continue
2782                 if not ignore_expires and cookie.is_expired(now):
2783                     continue
2784                 if cookie.secure:
2785                     secure = 'TRUE'
2786                 else:
2787                     secure = 'FALSE'
2788                 if cookie.domain.startswith('.'):
2789                     initial_dot = 'TRUE'
2790                 else:
2791                     initial_dot = 'FALSE'
2792                 if cookie.expires is not None:
2793                     expires = compat_str(cookie.expires)
2794                 else:
2795                     expires = ''
2796                 if cookie.value is None:
2797                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
2798                     # with no name, whereas http.cookiejar regards it as a
2799                     # cookie with no value.
2800                     name = ''
2801                     value = cookie.name
2802                 else:
2803                     name = cookie.name
2804                     value = cookie.value
2805                 f.write(
2806                     '\t'.join([cookie.domain, initial_dot, cookie.path,
2807                                secure, expires, name, value]) + '\n')
2808
2809     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2810         """Load cookies from a file."""
2811         if filename is None:
2812             if self.filename is not None:
2813                 filename = self.filename
2814             else:
2815                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2816
2817         def prepare_line(line):
2818             if line.startswith(self._HTTPONLY_PREFIX):
2819                 line = line[len(self._HTTPONLY_PREFIX):]
2820             # comments and empty lines are fine
2821             if line.startswith('#') or not line.strip():
2822                 return line
2823             cookie_list = line.split('\t')
2824             if len(cookie_list) != self._ENTRY_LEN:
2825                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2826             cookie = self._CookieFileEntry(*cookie_list)
2827             if cookie.expires_at and not cookie.expires_at.isdigit():
2828                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2829             return line
2830
2831         cf = io.StringIO()
2832         with io.open(filename, encoding='utf-8') as f:
2833             for line in f:
2834                 try:
2835                     cf.write(prepare_line(line))
2836                 except compat_cookiejar.LoadError as e:
2837                     write_string(
2838                         'WARNING: skipping cookie file entry due to %s: %r\n'
2839                         % (e, line), sys.stderr)
2840                     continue
2841         cf.seek(0)
2842         self._really_load(cf, filename, ignore_discard, ignore_expires)
2843         # Session cookies are denoted by either `expires` field set to
2844         # an empty string or 0. MozillaCookieJar only recognizes the former
2845         # (see [1]). So we need force the latter to be recognized as session
2846         # cookies on our own.
2847         # Session cookies may be important for cookies-based authentication,
2848         # e.g. usually, when user does not check 'Remember me' check box while
2849         # logging in on a site, some important cookies are stored as session
2850         # cookies so that not recognizing them will result in failed login.
2851         # 1. https://bugs.python.org/issue17164
2852         for cookie in self:
2853             # Treat `expires=0` cookies as session cookies
2854             if cookie.expires == 0:
2855                 cookie.expires = None
2856                 cookie.discard = True
2857
2858
2859 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2860     def __init__(self, cookiejar=None):
2861         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2862
2863     def http_response(self, request, response):
2864         # Python 2 will choke on next HTTP request in row if there are non-ASCII
2865         # characters in Set-Cookie HTTP header of last response (see
2866         # https://github.com/ytdl-org/youtube-dl/issues/6769).
2867         # In order to at least prevent crashing we will percent encode Set-Cookie
2868         # header before HTTPCookieProcessor starts processing it.
2869         # if sys.version_info < (3, 0) and response.headers:
2870         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2871         #         set_cookie = response.headers.get(set_cookie_header)
2872         #         if set_cookie:
2873         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2874         #             if set_cookie != set_cookie_escaped:
2875         #                 del response.headers[set_cookie_header]
2876         #                 response.headers[set_cookie_header] = set_cookie_escaped
2877         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2878
2879     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2880     https_response = http_response
2881
2882
2883 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2884     if sys.version_info[0] < 3:
2885         def redirect_request(self, req, fp, code, msg, headers, newurl):
2886             # On python 2 urlh.geturl() may sometimes return redirect URL
2887             # as byte string instead of unicode. This workaround allows
2888             # to force it always return unicode.
2889             return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2890
2891
2892 def extract_timezone(date_str):
2893     m = re.search(
2894         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2895         date_str)
2896     if not m:
2897         timezone = datetime.timedelta()
2898     else:
2899         date_str = date_str[:-len(m.group('tz'))]
2900         if not m.group('sign'):
2901             timezone = datetime.timedelta()
2902         else:
2903             sign = 1 if m.group('sign') == '+' else -1
2904             timezone = datetime.timedelta(
2905                 hours=sign * int(m.group('hours')),
2906                 minutes=sign * int(m.group('minutes')))
2907     return timezone, date_str
2908
2909
2910 def parse_iso8601(date_str, delimiter='T', timezone=None):
2911     """ Return a UNIX timestamp from the given date """
2912
2913     if date_str is None:
2914         return None
2915
2916     date_str = re.sub(r'\.[0-9]+', '', date_str)
2917
2918     if timezone is None:
2919         timezone, date_str = extract_timezone(date_str)
2920
2921     try:
2922         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2923         dt = datetime.datetime.strptime(date_str, date_format) - timezone
2924         return calendar.timegm(dt.timetuple())
2925     except ValueError:
2926         pass
2927
2928
2929 def date_formats(day_first=True):
2930     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2931
2932
2933 def unified_strdate(date_str, day_first=True):
2934     """Return a string with the date in the format YYYYMMDD"""
2935
2936     if date_str is None:
2937         return None
2938     upload_date = None
2939     # Replace commas
2940     date_str = date_str.replace(',', ' ')
2941     # Remove AM/PM + timezone
2942     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2943     _, date_str = extract_timezone(date_str)
2944
2945     for expression in date_formats(day_first):
2946         try:
2947             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2948         except ValueError:
2949             pass
2950     if upload_date is None:
2951         timetuple = email.utils.parsedate_tz(date_str)
2952         if timetuple:
2953             try:
2954                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2955             except ValueError:
2956                 pass
2957     if upload_date is not None:
2958         return compat_str(upload_date)
2959
2960
2961 def unified_timestamp(date_str, day_first=True):
2962     if date_str is None:
2963         return None
2964
2965     date_str = re.sub(r'[,|]', '', date_str)
2966
2967     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2968     timezone, date_str = extract_timezone(date_str)
2969
2970     # Remove AM/PM + timezone
2971     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2972
2973     # Remove unrecognized timezones from ISO 8601 alike timestamps
2974     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2975     if m:
2976         date_str = date_str[:-len(m.group('tz'))]
2977
2978     # Python only supports microseconds, so remove nanoseconds
2979     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2980     if m:
2981         date_str = m.group(1)
2982
2983     for expression in date_formats(day_first):
2984         try:
2985             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2986             return calendar.timegm(dt.timetuple())
2987         except ValueError:
2988             pass
2989     timetuple = email.utils.parsedate_tz(date_str)
2990     if timetuple:
2991         return calendar.timegm(timetuple) + pm_delta * 3600
2992
2993
2994 def determine_ext(url, default_ext='unknown_video'):
2995     if url is None or '.' not in url:
2996         return default_ext
2997     guess = url.partition('?')[0].rpartition('.')[2]
2998     if re.match(r'^[A-Za-z0-9]+$', guess):
2999         return guess
3000     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3001     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3002         return guess.rstrip('/')
3003     else:
3004         return default_ext
3005
3006
3007 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3008     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3009
3010
3011 def date_from_str(date_str):
3012     """
3013     Return a datetime object from a string in the format YYYYMMDD or
3014     (now|today)[+-][0-9](day|week|month|year)(s)?"""
3015     today = datetime.date.today()
3016     if date_str in ('now', 'today'):
3017         return today
3018     if date_str == 'yesterday':
3019         return today - datetime.timedelta(days=1)
3020     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3021     if match is not None:
3022         sign = match.group('sign')
3023         time = int(match.group('time'))
3024         if sign == '-':
3025             time = -time
3026         unit = match.group('unit')
3027         # A bad approximation?
3028         if unit == 'month':
3029             unit = 'day'
3030             time *= 30
3031         elif unit == 'year':
3032             unit = 'day'
3033             time *= 365
3034         unit += 's'
3035         delta = datetime.timedelta(**{unit: time})
3036         return today + delta
3037     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3038
3039
3040 def hyphenate_date(date_str):
3041     """
3042     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3043     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3044     if match is not None:
3045         return '-'.join(match.groups())
3046     else:
3047         return date_str
3048
3049
3050 class DateRange(object):
3051     """Represents a time interval between two dates"""
3052
3053     def __init__(self, start=None, end=None):
3054         """start and end must be strings in the format accepted by date"""
3055         if start is not None:
3056             self.start = date_from_str(start)
3057         else:
3058             self.start = datetime.datetime.min.date()
3059         if end is not None:
3060             self.end = date_from_str(end)
3061         else:
3062             self.end = datetime.datetime.max.date()
3063         if self.start > self.end:
3064             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3065
3066     @classmethod
3067     def day(cls, day):
3068         """Returns a range that only contains the given day"""
3069         return cls(day, day)
3070
3071     def __contains__(self, date):
3072         """Check if the date is in the range"""
3073         if not isinstance(date, datetime.date):
3074             date = date_from_str(date)
3075         return self.start <= date <= self.end
3076
3077     def __str__(self):
3078         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3079
3080
3081 def platform_name():
3082     """ Returns the platform name as a compat_str """
3083     res = platform.platform()
3084     if isinstance(res, bytes):
3085         res = res.decode(preferredencoding())
3086
3087     assert isinstance(res, compat_str)
3088     return res
3089
3090
3091 def _windows_write_string(s, out):
3092     """ Returns True if the string was written using special methods,
3093     False if it has yet to be written out."""
3094     # Adapted from http://stackoverflow.com/a/3259271/35070
3095
3096     import ctypes
3097     import ctypes.wintypes
3098
3099     WIN_OUTPUT_IDS = {
3100         1: -11,
3101         2: -12,
3102     }
3103
3104     try:
3105         fileno = out.fileno()
3106     except AttributeError:
3107         # If the output stream doesn't have a fileno, it's virtual
3108         return False
3109     except io.UnsupportedOperation:
3110         # Some strange Windows pseudo files?
3111         return False
3112     if fileno not in WIN_OUTPUT_IDS:
3113         return False
3114
3115     GetStdHandle = compat_ctypes_WINFUNCTYPE(
3116         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3117         ('GetStdHandle', ctypes.windll.kernel32))
3118     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3119
3120     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3121         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3122         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3123         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3124     written = ctypes.wintypes.DWORD(0)
3125
3126     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3127     FILE_TYPE_CHAR = 0x0002
3128     FILE_TYPE_REMOTE = 0x8000
3129     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3130         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3131         ctypes.POINTER(ctypes.wintypes.DWORD))(
3132         ('GetConsoleMode', ctypes.windll.kernel32))
3133     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3134
3135     def not_a_console(handle):
3136         if handle == INVALID_HANDLE_VALUE or handle is None:
3137             return True
3138         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3139                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3140
3141     if not_a_console(h):
3142         return False
3143
3144     def next_nonbmp_pos(s):
3145         try:
3146             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3147         except StopIteration:
3148             return len(s)
3149
3150     while s:
3151         count = min(next_nonbmp_pos(s), 1024)
3152
3153         ret = WriteConsoleW(
3154             h, s, count if count else 2, ctypes.byref(written), None)
3155         if ret == 0:
3156             raise OSError('Failed to write string')
3157         if not count:  # We just wrote a non-BMP character
3158             assert written.value == 2
3159             s = s[1:]
3160         else:
3161             assert written.value > 0
3162             s = s[written.value:]
3163     return True
3164
3165
3166 def write_string(s, out=None, encoding=None):
3167     if out is None:
3168         out = sys.stderr
3169     assert type(s) == compat_str
3170
3171     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3172         if _windows_write_string(s, out):
3173             return
3174
3175     if ('b' in getattr(out, 'mode', '')
3176             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
3177         byt = s.encode(encoding or preferredencoding(), 'ignore')
3178         out.write(byt)
3179     elif hasattr(out, 'buffer'):
3180         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3181         byt = s.encode(enc, 'ignore')
3182         out.buffer.write(byt)
3183     else:
3184         out.write(s)
3185     out.flush()
3186
3187
3188 def bytes_to_intlist(bs):
3189     if not bs:
3190         return []
3191     if isinstance(bs[0], int):  # Python 3
3192         return list(bs)
3193     else:
3194         return [ord(c) for c in bs]
3195
3196
3197 def intlist_to_bytes(xs):
3198     if not xs:
3199         return b''
3200     return compat_struct_pack('%dB' % len(xs), *xs)
3201
3202
3203 # Cross-platform file locking
3204 if sys.platform == 'win32':
3205     import ctypes.wintypes
3206     import msvcrt
3207
3208     class OVERLAPPED(ctypes.Structure):
3209         _fields_ = [
3210             ('Internal', ctypes.wintypes.LPVOID),
3211             ('InternalHigh', ctypes.wintypes.LPVOID),
3212             ('Offset', ctypes.wintypes.DWORD),
3213             ('OffsetHigh', ctypes.wintypes.DWORD),
3214             ('hEvent', ctypes.wintypes.HANDLE),
3215         ]
3216
3217     kernel32 = ctypes.windll.kernel32
3218     LockFileEx = kernel32.LockFileEx
3219     LockFileEx.argtypes = [
3220         ctypes.wintypes.HANDLE,     # hFile
3221         ctypes.wintypes.DWORD,      # dwFlags
3222         ctypes.wintypes.DWORD,      # dwReserved
3223         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3224         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3225         ctypes.POINTER(OVERLAPPED)  # Overlapped
3226     ]
3227     LockFileEx.restype = ctypes.wintypes.BOOL
3228     UnlockFileEx = kernel32.UnlockFileEx
3229     UnlockFileEx.argtypes = [
3230         ctypes.wintypes.HANDLE,     # hFile
3231         ctypes.wintypes.DWORD,      # dwReserved
3232         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
3233         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
3234         ctypes.POINTER(OVERLAPPED)  # Overlapped
3235     ]
3236     UnlockFileEx.restype = ctypes.wintypes.BOOL
3237     whole_low = 0xffffffff
3238     whole_high = 0x7fffffff
3239
3240     def _lock_file(f, exclusive):
3241         overlapped = OVERLAPPED()
3242         overlapped.Offset = 0
3243         overlapped.OffsetHigh = 0
3244         overlapped.hEvent = 0
3245         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3246         handle = msvcrt.get_osfhandle(f.fileno())
3247         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3248                           whole_low, whole_high, f._lock_file_overlapped_p):
3249             raise OSError('Locking file failed: %r' % ctypes.FormatError())
3250
3251     def _unlock_file(f):
3252         assert f._lock_file_overlapped_p
3253         handle = msvcrt.get_osfhandle(f.fileno())
3254         if not UnlockFileEx(handle, 0,
3255                             whole_low, whole_high, f._lock_file_overlapped_p):
3256             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3257
3258 else:
3259     # Some platforms, such as Jython, is missing fcntl
3260     try:
3261         import fcntl
3262
3263         def _lock_file(f, exclusive):
3264             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3265
3266         def _unlock_file(f):
3267             fcntl.flock(f, fcntl.LOCK_UN)
3268     except ImportError:
3269         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3270
3271         def _lock_file(f, exclusive):
3272             raise IOError(UNSUPPORTED_MSG)
3273
3274         def _unlock_file(f):
3275             raise IOError(UNSUPPORTED_MSG)
3276
3277
3278 class locked_file(object):
3279     def __init__(self, filename, mode, encoding=None):
3280         assert mode in ['r', 'a', 'w']
3281         self.f = io.open(filename, mode, encoding=encoding)
3282         self.mode = mode
3283
3284     def __enter__(self):
3285         exclusive = self.mode != 'r'
3286         try:
3287             _lock_file(self.f, exclusive)
3288         except IOError:
3289             self.f.close()
3290             raise
3291         return self
3292
3293     def __exit__(self, etype, value, traceback):
3294         try:
3295             _unlock_file(self.f)
3296         finally:
3297             self.f.close()
3298
3299     def __iter__(self):
3300         return iter(self.f)
3301
3302     def write(self, *args):
3303         return self.f.write(*args)
3304
3305     def read(self, *args):
3306         return self.f.read(*args)
3307
3308
3309 def get_filesystem_encoding():
3310     encoding = sys.getfilesystemencoding()
3311     return encoding if encoding is not None else 'utf-8'
3312
3313
3314 def shell_quote(args):
3315     quoted_args = []
3316     encoding = get_filesystem_encoding()
3317     for a in args:
3318         if isinstance(a, bytes):
3319             # We may get a filename encoded with 'encodeFilename'
3320             a = a.decode(encoding)
3321         quoted_args.append(compat_shlex_quote(a))
3322     return ' '.join(quoted_args)
3323
3324
3325 def smuggle_url(url, data):
3326     """ Pass additional data in a URL for internal use. """
3327
3328     url, idata = unsmuggle_url(url, {})
3329     data.update(idata)
3330     sdata = compat_urllib_parse_urlencode(
3331         {'__youtubedl_smuggle': json.dumps(data)})
3332     return url + '#' + sdata
3333
3334
3335 def unsmuggle_url(smug_url, default=None):
3336     if '#__youtubedl_smuggle' not in smug_url:
3337         return smug_url, default
3338     url, _, sdata = smug_url.rpartition('#')
3339     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3340     data = json.loads(jsond)
3341     return url, data
3342
3343
3344 def format_bytes(bytes):
3345     if bytes is None:
3346         return 'N/A'
3347     if type(bytes) is str:
3348         bytes = float(bytes)
3349     if bytes == 0.0:
3350         exponent = 0
3351     else:
3352         exponent = int(math.log(bytes, 1024.0))
3353     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3354     converted = float(bytes) / float(1024 ** exponent)
3355     return '%.2f%s' % (converted, suffix)
3356
3357
3358 def lookup_unit_table(unit_table, s):
3359     units_re = '|'.join(re.escape(u) for u in unit_table)
3360     m = re.match(
3361         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3362     if not m:
3363         return None
3364     num_str = m.group('num').replace(',', '.')
3365     mult = unit_table[m.group('unit')]
3366     return int(float(num_str) * mult)
3367
3368
3369 def parse_filesize(s):
3370     if s is None:
3371         return None
3372
3373     # The lower-case forms are of course incorrect and unofficial,
3374     # but we support those too
3375     _UNIT_TABLE = {
3376         'B': 1,
3377         'b': 1,
3378         'bytes': 1,
3379         'KiB': 1024,
3380         'KB': 1000,
3381         'kB': 1024,
3382         'Kb': 1000,
3383         'kb': 1000,
3384         'kilobytes': 1000,
3385         'kibibytes': 1024,
3386         'MiB': 1024 ** 2,
3387         'MB': 1000 ** 2,
3388         'mB': 1024 ** 2,
3389         'Mb': 1000 ** 2,
3390         'mb': 1000 ** 2,
3391         'megabytes': 1000 ** 2,
3392         'mebibytes': 1024 ** 2,
3393         'GiB': 1024 ** 3,
3394         'GB': 1000 ** 3,
3395         'gB': 1024 ** 3,
3396         'Gb': 1000 ** 3,
3397         'gb': 1000 ** 3,
3398         'gigabytes': 1000 ** 3,
3399         'gibibytes': 1024 ** 3,
3400         'TiB': 1024 ** 4,
3401         'TB': 1000 ** 4,
3402         'tB': 1024 ** 4,
3403         'Tb': 1000 ** 4,
3404         'tb': 1000 ** 4,
3405         'terabytes': 1000 ** 4,
3406         'tebibytes': 1024 ** 4,
3407         'PiB': 1024 ** 5,
3408         'PB': 1000 ** 5,
3409         'pB': 1024 ** 5,
3410         'Pb': 1000 ** 5,
3411         'pb': 1000 ** 5,
3412         'petabytes': 1000 ** 5,
3413         'pebibytes': 1024 ** 5,
3414         'EiB': 1024 ** 6,
3415         'EB': 1000 ** 6,
3416         'eB': 1024 ** 6,
3417         'Eb': 1000 ** 6,
3418         'eb': 1000 ** 6,
3419         'exabytes': 1000 ** 6,
3420         'exbibytes': 1024 ** 6,
3421         'ZiB': 1024 ** 7,
3422         'ZB': 1000 ** 7,
3423         'zB': 1024 ** 7,
3424         'Zb': 1000 ** 7,
3425         'zb': 1000 ** 7,
3426         'zettabytes': 1000 ** 7,
3427         'zebibytes': 1024 ** 7,
3428         'YiB': 1024 ** 8,
3429         'YB': 1000 ** 8,
3430         'yB': 1024 ** 8,
3431         'Yb': 1000 ** 8,
3432         'yb': 1000 ** 8,
3433         'yottabytes': 1000 ** 8,
3434         'yobibytes': 1024 ** 8,
3435     }
3436
3437     return lookup_unit_table(_UNIT_TABLE, s)
3438
3439
3440 def parse_count(s):
3441     if s is None:
3442         return None
3443
3444     s = s.strip()
3445
3446     if re.match(r'^[\d,.]+$', s):
3447         return str_to_int(s)
3448
3449     _UNIT_TABLE = {
3450         'k': 1000,
3451         'K': 1000,
3452         'm': 1000 ** 2,
3453         'M': 1000 ** 2,
3454         'kk': 1000 ** 2,
3455         'KK': 1000 ** 2,
3456     }
3457
3458     return lookup_unit_table(_UNIT_TABLE, s)
3459
3460
3461 def parse_resolution(s):
3462     if s is None:
3463         return {}
3464
3465     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3466     if mobj:
3467         return {
3468             'width': int(mobj.group('w')),
3469             'height': int(mobj.group('h')),
3470         }
3471
3472     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3473     if mobj:
3474         return {'height': int(mobj.group(1))}
3475
3476     mobj = re.search(r'\b([48])[kK]\b', s)
3477     if mobj:
3478         return {'height': int(mobj.group(1)) * 540}
3479
3480     return {}
3481
3482
3483 def parse_bitrate(s):
3484     if not isinstance(s, compat_str):
3485         return
3486     mobj = re.search(r'\b(\d+)\s*kbps', s)
3487     if mobj:
3488         return int(mobj.group(1))
3489
3490
3491 def month_by_name(name, lang='en'):
3492     """ Return the number of a month by (locale-independently) English name """
3493
3494     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3495
3496     try:
3497         return month_names.index(name) + 1
3498     except ValueError:
3499         return None
3500
3501
3502 def month_by_abbreviation(abbrev):
3503     """ Return the number of a month by (locale-independently) English
3504         abbreviations """
3505
3506     try:
3507         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3508     except ValueError:
3509         return None
3510
3511
3512 def fix_xml_ampersands(xml_str):
3513     """Replace all the '&' by '&amp;' in XML"""
3514     return re.sub(
3515         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3516         '&amp;',
3517         xml_str)
3518
3519
3520 def setproctitle(title):
3521     assert isinstance(title, compat_str)
3522
3523     # ctypes in Jython is not complete
3524     # http://bugs.jython.org/issue2148
3525     if sys.platform.startswith('java'):
3526         return
3527
3528     try:
3529         libc = ctypes.cdll.LoadLibrary('libc.so.6')
3530     except OSError:
3531         return
3532     except TypeError:
3533         # LoadLibrary in Windows Python 2.7.13 only expects
3534         # a bytestring, but since unicode_literals turns
3535         # every string into a unicode string, it fails.
3536         return
3537     title_bytes = title.encode('utf-8')
3538     buf = ctypes.create_string_buffer(len(title_bytes))
3539     buf.value = title_bytes
3540     try:
3541         libc.prctl(15, buf, 0, 0, 0)
3542     except AttributeError:
3543         return  # Strange libc, just skip this
3544
3545
3546 def remove_start(s, start):
3547     return s[len(start):] if s is not None and s.startswith(start) else s
3548
3549
3550 def remove_end(s, end):
3551     return s[:-len(end)] if s is not None and s.endswith(end) else s
3552
3553
3554 def remove_quotes(s):
3555     if s is None or len(s) < 2:
3556         return s
3557     for quote in ('"', "'", ):
3558         if s[0] == quote and s[-1] == quote:
3559             return s[1:-1]
3560     return s
3561
3562
3563 def get_domain(url):
3564     domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3565     return domain.group('domain') if domain else None
3566
3567
3568 def url_basename(url):
3569     path = compat_urlparse.urlparse(url).path
3570     return path.strip('/').split('/')[-1]
3571
3572
3573 def base_url(url):
3574     return re.match(r'https?://[^?#&]+/', url).group()
3575
3576
3577 def urljoin(base, path):
3578     if isinstance(path, bytes):
3579         path = path.decode('utf-8')
3580     if not isinstance(path, compat_str) or not path:
3581         return None
3582     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3583         return path
3584     if isinstance(base, bytes):
3585         base = base.decode('utf-8')
3586     if not isinstance(base, compat_str) or not re.match(
3587             r'^(?:https?:)?//', base):
3588         return None
3589     return compat_urlparse.urljoin(base, path)
3590
3591
3592 class HEADRequest(compat_urllib_request.Request):
3593     def get_method(self):
3594         return 'HEAD'
3595
3596
3597 class PUTRequest(compat_urllib_request.Request):
3598     def get_method(self):
3599         return 'PUT'
3600
3601
3602 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3603     if get_attr:
3604         if v is not None:
3605             v = getattr(v, get_attr, None)
3606     if v == '':
3607         v = None
3608     if v is None:
3609         return default
3610     try:
3611         return int(v) * invscale // scale
3612     except (ValueError, TypeError):
3613         return default
3614
3615
3616 def str_or_none(v, default=None):
3617     return default if v is None else compat_str(v)
3618
3619
3620 def str_to_int(int_str):
3621     """ A more relaxed version of int_or_none """
3622     if isinstance(int_str, compat_integer_types):
3623         return int_str
3624     elif isinstance(int_str, compat_str):
3625         int_str = re.sub(r'[,\.\+]', '', int_str)
3626         return int_or_none(int_str)
3627
3628
3629 def float_or_none(v, scale=1, invscale=1, default=None):
3630     if v is None:
3631         return default
3632     try:
3633         return float(v) * invscale / scale
3634     except (ValueError, TypeError):
3635         return default
3636
3637
3638 def bool_or_none(v, default=None):
3639     return v if isinstance(v, bool) else default
3640
3641
3642 def strip_or_none(v, default=None):
3643     return v.strip() if isinstance(v, compat_str) else default
3644
3645
3646 def url_or_none(url):
3647     if not url or not isinstance(url, compat_str):
3648         return None
3649     url = url.strip()
3650     return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3651
3652
3653 def parse_duration(s):
3654     if not isinstance(s, compat_basestring):
3655         return None
3656
3657     s = s.strip()
3658
3659     days, hours, mins, secs, ms = [None] * 5
3660     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3661     if m:
3662         days, hours, mins, secs, ms = m.groups()
3663     else:
3664         m = re.match(
3665             r'''(?ix)(?:P?
3666                 (?:
3667                     [0-9]+\s*y(?:ears?)?\s*
3668                 )?
3669                 (?:
3670                     [0-9]+\s*m(?:onths?)?\s*
3671                 )?
3672                 (?:
3673                     [0-9]+\s*w(?:eeks?)?\s*
3674                 )?
3675                 (?:
3676                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3677                 )?
3678                 T)?
3679                 (?:
3680                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3681                 )?
3682                 (?:
3683                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3684                 )?
3685                 (?:
3686                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3687                 )?Z?$''', s)
3688         if m:
3689             days, hours, mins, secs, ms = m.groups()
3690         else:
3691             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3692             if m:
3693                 hours, mins = m.groups()
3694             else:
3695                 return None
3696
3697     duration = 0
3698     if secs:
3699         duration += float(secs)
3700     if mins:
3701         duration += float(mins) * 60
3702     if hours:
3703         duration += float(hours) * 60 * 60
3704     if days:
3705         duration += float(days) * 24 * 60 * 60
3706     if ms:
3707         duration += float(ms)
3708     return duration
3709
3710
3711 def prepend_extension(filename, ext, expected_real_ext=None):
3712     name, real_ext = os.path.splitext(filename)
3713     return (
3714         '{0}.{1}{2}'.format(name, ext, real_ext)
3715         if not expected_real_ext or real_ext[1:] == expected_real_ext
3716         else '{0}.{1}'.format(filename, ext))
3717
3718
3719 def replace_extension(filename, ext, expected_real_ext=None):
3720     name, real_ext = os.path.splitext(filename)
3721     return '{0}.{1}'.format(
3722         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3723         ext)
3724
3725
3726 def check_executable(exe, args=[]):
3727     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3728     args can be a list of arguments for a short output (like -version) """
3729     try:
3730         subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3731     except OSError:
3732         return False
3733     return exe
3734
3735
3736 def get_exe_version(exe, args=['--version'],
3737                     version_re=None, unrecognized='present'):
3738     """ Returns the version of the specified executable,
3739     or False if the executable is not present """
3740     try:
3741         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3742         # SIGTTOU if youtube-dlc is run in the background.
3743         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3744         out, _ = subprocess.Popen(
3745             [encodeArgument(exe)] + args,
3746             stdin=subprocess.PIPE,
3747             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3748     except OSError:
3749         return False
3750     if isinstance(out, bytes):  # Python 2.x
3751         out = out.decode('ascii', 'ignore')
3752     return detect_exe_version(out, version_re, unrecognized)
3753
3754
3755 def detect_exe_version(output, version_re=None, unrecognized='present'):
3756     assert isinstance(output, compat_str)
3757     if version_re is None:
3758         version_re = r'version\s+([-0-9._a-zA-Z]+)'
3759     m = re.search(version_re, output)
3760     if m:
3761         return m.group(1)
3762     else:
3763         return unrecognized
3764
3765
3766 class PagedList(object):
3767     def __len__(self):
3768         # This is only useful for tests
3769         return len(self.getslice())
3770
3771
3772 class OnDemandPagedList(PagedList):
3773     def __init__(self, pagefunc, pagesize, use_cache=True):
3774         self._pagefunc = pagefunc
3775         self._pagesize = pagesize
3776         self._use_cache = use_cache
3777         if use_cache:
3778             self._cache = {}
3779
3780     def getslice(self, start=0, end=None):
3781         res = []
3782         for pagenum in itertools.count(start // self._pagesize):
3783             firstid = pagenum * self._pagesize
3784             nextfirstid = pagenum * self._pagesize + self._pagesize
3785             if start >= nextfirstid:
3786                 continue
3787
3788             page_results = None
3789             if self._use_cache:
3790                 page_results = self._cache.get(pagenum)
3791             if page_results is None:
3792                 page_results = list(self._pagefunc(pagenum))
3793             if self._use_cache:
3794                 self._cache[pagenum] = page_results
3795
3796             startv = (
3797                 start % self._pagesize
3798                 if firstid <= start < nextfirstid
3799                 else 0)
3800
3801             endv = (
3802                 ((end - 1) % self._pagesize) + 1
3803                 if (end is not None and firstid <= end <= nextfirstid)
3804                 else None)
3805
3806             if startv != 0 or endv is not None:
3807                 page_results = page_results[startv:endv]
3808             res.extend(page_results)
3809
3810             # A little optimization - if current page is not "full", ie. does
3811             # not contain page_size videos then we can assume that this page
3812             # is the last one - there are no more ids on further pages -
3813             # i.e. no need to query again.
3814             if len(page_results) + startv < self._pagesize:
3815                 break
3816
3817             # If we got the whole page, but the next page is not interesting,
3818             # break out early as well
3819             if end == nextfirstid:
3820                 break
3821         return res
3822
3823
3824 class InAdvancePagedList(PagedList):
3825     def __init__(self, pagefunc, pagecount, pagesize):
3826         self._pagefunc = pagefunc
3827         self._pagecount = pagecount
3828         self._pagesize = pagesize
3829
3830     def getslice(self, start=0, end=None):
3831         res = []
3832         start_page = start // self._pagesize
3833         end_page = (
3834             self._pagecount if end is None else (end // self._pagesize + 1))
3835         skip_elems = start - start_page * self._pagesize
3836         only_more = None if end is None else end - start
3837         for pagenum in range(start_page, end_page):
3838             page = list(self._pagefunc(pagenum))
3839             if skip_elems:
3840                 page = page[skip_elems:]
3841                 skip_elems = None
3842             if only_more is not None:
3843                 if len(page) < only_more:
3844                     only_more -= len(page)
3845                 else:
3846                     page = page[:only_more]
3847                     res.extend(page)
3848                     break
3849             res.extend(page)
3850         return res
3851
3852
3853 def uppercase_escape(s):
3854     unicode_escape = codecs.getdecoder('unicode_escape')
3855     return re.sub(
3856         r'\\U[0-9a-fA-F]{8}',
3857         lambda m: unicode_escape(m.group(0))[0],
3858         s)
3859
3860
3861 def lowercase_escape(s):
3862     unicode_escape = codecs.getdecoder('unicode_escape')
3863     return re.sub(
3864         r'\\u[0-9a-fA-F]{4}',
3865         lambda m: unicode_escape(m.group(0))[0],
3866         s)
3867
3868
3869 def escape_rfc3986(s):
3870     """Escape non-ASCII characters as suggested by RFC 3986"""
3871     if sys.version_info < (3, 0) and isinstance(s, compat_str):
3872         s = s.encode('utf-8')
3873     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3874
3875
3876 def escape_url(url):
3877     """Escape URL as suggested by RFC 3986"""
3878     url_parsed = compat_urllib_parse_urlparse(url)
3879     return url_parsed._replace(
3880         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3881         path=escape_rfc3986(url_parsed.path),
3882         params=escape_rfc3986(url_parsed.params),
3883         query=escape_rfc3986(url_parsed.query),
3884         fragment=escape_rfc3986(url_parsed.fragment)
3885     ).geturl()
3886
3887
3888 def read_batch_urls(batch_fd):
3889     def fixup(url):
3890         if not isinstance(url, compat_str):
3891             url = url.decode('utf-8', 'replace')
3892         BOM_UTF8 = '\xef\xbb\xbf'
3893         if url.startswith(BOM_UTF8):
3894             url = url[len(BOM_UTF8):]
3895         url = url.strip()
3896         if url.startswith(('#', ';', ']')):
3897             return False
3898         return url
3899
3900     with contextlib.closing(batch_fd) as fd:
3901         return [url for url in map(fixup, fd) if url]
3902
3903
3904 def urlencode_postdata(*args, **kargs):
3905     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3906
3907
3908 def update_url_query(url, query):
3909     if not query:
3910         return url
3911     parsed_url = compat_urlparse.urlparse(url)
3912     qs = compat_parse_qs(parsed_url.query)
3913     qs.update(query)
3914     return compat_urlparse.urlunparse(parsed_url._replace(
3915         query=compat_urllib_parse_urlencode(qs, True)))
3916
3917
3918 def update_Request(req, url=None, data=None, headers={}, query={}):
3919     req_headers = req.headers.copy()
3920     req_headers.update(headers)
3921     req_data = data or req.data
3922     req_url = update_url_query(url or req.get_full_url(), query)
3923     req_get_method = req.get_method()
3924     if req_get_method == 'HEAD':
3925         req_type = HEADRequest
3926     elif req_get_method == 'PUT':
3927         req_type = PUTRequest
3928     else:
3929         req_type = compat_urllib_request.Request
3930     new_req = req_type(
3931         req_url, data=req_data, headers=req_headers,
3932         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3933     if hasattr(req, 'timeout'):
3934         new_req.timeout = req.timeout
3935     return new_req
3936
3937
3938 def _multipart_encode_impl(data, boundary):
3939     content_type = 'multipart/form-data; boundary=%s' % boundary
3940
3941     out = b''
3942     for k, v in data.items():
3943         out += b'--' + boundary.encode('ascii') + b'\r\n'
3944         if isinstance(k, compat_str):
3945             k = k.encode('utf-8')
3946         if isinstance(v, compat_str):
3947             v = v.encode('utf-8')
3948         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3949         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3950         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3951         if boundary.encode('ascii') in content:
3952             raise ValueError('Boundary overlaps with data')
3953         out += content
3954
3955     out += b'--' + boundary.encode('ascii') + b'--\r\n'
3956
3957     return out, content_type
3958
3959
3960 def multipart_encode(data, boundary=None):
3961     '''
3962     Encode a dict to RFC 7578-compliant form-data
3963
3964     data:
3965         A dict where keys and values can be either Unicode or bytes-like
3966         objects.
3967     boundary:
3968         If specified a Unicode object, it's used as the boundary. Otherwise
3969         a random boundary is generated.
3970
3971     Reference: https://tools.ietf.org/html/rfc7578
3972     '''
3973     has_specified_boundary = boundary is not None
3974
3975     while True:
3976         if boundary is None:
3977             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3978
3979         try:
3980             out, content_type = _multipart_encode_impl(data, boundary)
3981             break
3982         except ValueError:
3983             if has_specified_boundary:
3984                 raise
3985             boundary = None
3986
3987     return out, content_type
3988
3989
3990 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3991     if isinstance(key_or_keys, (list, tuple)):
3992         for key in key_or_keys:
3993             if key not in d or d[key] is None or skip_false_values and not d[key]:
3994                 continue
3995             return d[key]
3996         return default
3997     return d.get(key_or_keys, default)
3998
3999
4000 def try_get(src, getter, expected_type=None):
4001     if not isinstance(getter, (list, tuple)):
4002         getter = [getter]
4003     for get in getter:
4004         try:
4005             v = get(src)
4006         except (AttributeError, KeyError, TypeError, IndexError):
4007             pass
4008         else:
4009             if expected_type is None or isinstance(v, expected_type):
4010                 return v
4011
4012
4013 def merge_dicts(*dicts):
4014     merged = {}
4015     for a_dict in dicts:
4016         for k, v in a_dict.items():
4017             if v is None:
4018                 continue
4019             if (k not in merged
4020                     or (isinstance(v, compat_str) and v
4021                         and isinstance(merged[k], compat_str)
4022                         and not merged[k])):
4023                 merged[k] = v
4024     return merged
4025
4026
4027 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4028     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4029
4030
4031 US_RATINGS = {
4032     'G': 0,
4033     'PG': 10,
4034     'PG-13': 13,
4035     'R': 16,
4036     'NC': 18,
4037 }
4038
4039
4040 TV_PARENTAL_GUIDELINES = {
4041     'TV-Y': 0,
4042     'TV-Y7': 7,
4043     'TV-G': 0,
4044     'TV-PG': 0,
4045     'TV-14': 14,
4046     'TV-MA': 17,
4047 }
4048
4049
4050 def parse_age_limit(s):
4051     if type(s) == int:
4052         return s if 0 <= s <= 21 else None
4053     if not isinstance(s, compat_basestring):
4054         return None
4055     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4056     if m:
4057         return int(m.group('age'))
4058     if s in US_RATINGS:
4059         return US_RATINGS[s]
4060     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4061     if m:
4062         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4063     return None
4064
4065
4066 def strip_jsonp(code):
4067     return re.sub(
4068         r'''(?sx)^
4069             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4070             (?:\s*&&\s*(?P=func_name))?
4071             \s*\(\s*(?P<callback_data>.*)\);?
4072             \s*?(?://[^\n]*)*$''',
4073         r'\g<callback_data>', code)
4074
4075
4076 def js_to_json(code):
4077     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4078     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4079     INTEGER_TABLE = (
4080         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4081         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4082     )
4083
4084     def fix_kv(m):
4085         v = m.group(0)
4086         if v in ('true', 'false', 'null'):
4087             return v
4088         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4089             return ""
4090
4091         if v[0] in ("'", '"'):
4092             v = re.sub(r'(?s)\\.|"', lambda m: {
4093                 '"': '\\"',
4094                 "\\'": "'",
4095                 '\\\n': '',
4096                 '\\x': '\\u00',
4097             }.get(m.group(0), m.group(0)), v[1:-1])
4098         else:
4099             for regex, base in INTEGER_TABLE:
4100                 im = re.match(regex, v)
4101                 if im:
4102                     i = int(im.group(1), base)
4103                     return '"%d":' % i if v.endswith(':') else '%d' % i
4104
4105         return '"%s"' % v
4106
4107     return re.sub(r'''(?sx)
4108         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4109         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4110         {comment}|,(?={skip}[\]}}])|
4111         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4112         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4113         [0-9]+(?={skip}:)|
4114         !+
4115         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4116
4117
4118 def qualities(quality_ids):
4119     """ Get a numeric quality value out of a list of possible values """
4120     def q(qid):
4121         try:
4122             return quality_ids.index(qid)
4123         except ValueError:
4124             return -1
4125     return q
4126
4127
4128 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4129
4130
4131 def limit_length(s, length):
4132     """ Add ellipses to overly long strings """
4133     if s is None:
4134         return None
4135     ELLIPSES = '...'
4136     if len(s) > length:
4137         return s[:length - len(ELLIPSES)] + ELLIPSES
4138     return s
4139
4140
4141 def version_tuple(v):
4142     return tuple(int(e) for e in re.split(r'[-.]', v))
4143
4144
4145 def is_outdated_version(version, limit, assume_new=True):
4146     if not version:
4147         return not assume_new
4148     try:
4149         return version_tuple(version) < version_tuple(limit)
4150     except ValueError:
4151         return not assume_new
4152
4153
4154 def ytdl_is_updateable():
4155     """ Returns if youtube-dlc can be updated with -U """
4156     from zipimport import zipimporter
4157
4158     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4159
4160
4161 def args_to_str(args):
4162     # Get a short string representation for a subprocess command
4163     return ' '.join(compat_shlex_quote(a) for a in args)
4164
4165
4166 def error_to_compat_str(err):
4167     err_str = str(err)
4168     # On python 2 error byte string must be decoded with proper
4169     # encoding rather than ascii
4170     if sys.version_info[0] < 3:
4171         err_str = err_str.decode(preferredencoding())
4172     return err_str
4173
4174
4175 def mimetype2ext(mt):
4176     if mt is None:
4177         return None
4178
4179     ext = {
4180         'audio/mp4': 'm4a',
4181         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4182         # it's the most popular one
4183         'audio/mpeg': 'mp3',
4184         'audio/x-wav': 'wav',
4185     }.get(mt)
4186     if ext is not None:
4187         return ext
4188
4189     _, _, res = mt.rpartition('/')
4190     res = res.split(';')[0].strip().lower()
4191
4192     return {
4193         '3gpp': '3gp',
4194         'smptett+xml': 'tt',
4195         'ttaf+xml': 'dfxp',
4196         'ttml+xml': 'ttml',
4197         'x-flv': 'flv',
4198         'x-mp4-fragmented': 'mp4',
4199         'x-ms-sami': 'sami',
4200         'x-ms-wmv': 'wmv',
4201         'mpegurl': 'm3u8',
4202         'x-mpegurl': 'm3u8',
4203         'vnd.apple.mpegurl': 'm3u8',
4204         'dash+xml': 'mpd',
4205         'f4m+xml': 'f4m',
4206         'hds+xml': 'f4m',
4207         'vnd.ms-sstr+xml': 'ism',
4208         'quicktime': 'mov',
4209         'mp2t': 'ts',
4210         'x-wav': 'wav',
4211     }.get(res, res)
4212
4213
4214 def parse_codecs(codecs_str):
4215     # http://tools.ietf.org/html/rfc6381
4216     if not codecs_str:
4217         return {}
4218     splited_codecs = list(filter(None, map(
4219         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4220     vcodec, acodec = None, None
4221     for full_codec in splited_codecs:
4222         codec = full_codec.split('.')[0]
4223         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4224             if not vcodec:
4225                 vcodec = full_codec
4226         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4227             if not acodec:
4228                 acodec = full_codec
4229         else:
4230             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4231     if not vcodec and not acodec:
4232         if len(splited_codecs) == 2:
4233             return {
4234                 'vcodec': splited_codecs[0],
4235                 'acodec': splited_codecs[1],
4236             }
4237     else:
4238         return {
4239             'vcodec': vcodec or 'none',
4240             'acodec': acodec or 'none',
4241         }
4242     return {}
4243
4244
4245 def urlhandle_detect_ext(url_handle):
4246     getheader = url_handle.headers.get
4247
4248     cd = getheader('Content-Disposition')
4249     if cd:
4250         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4251         if m:
4252             e = determine_ext(m.group('filename'), default_ext=None)
4253             if e:
4254                 return e
4255
4256     return mimetype2ext(getheader('Content-Type'))
4257
4258
4259 def encode_data_uri(data, mime_type):
4260     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4261
4262
4263 def age_restricted(content_limit, age_limit):
4264     """ Returns True iff the content should be blocked """
4265
4266     if age_limit is None:  # No limit set
4267         return False
4268     if content_limit is None:
4269         return False  # Content available for everyone
4270     return age_limit < content_limit
4271
4272
4273 def is_html(first_bytes):
4274     """ Detect whether a file contains HTML by examining its first bytes. """
4275
4276     BOMS = [
4277         (b'\xef\xbb\xbf', 'utf-8'),
4278         (b'\x00\x00\xfe\xff', 'utf-32-be'),
4279         (b'\xff\xfe\x00\x00', 'utf-32-le'),
4280         (b'\xff\xfe', 'utf-16-le'),
4281         (b'\xfe\xff', 'utf-16-be'),
4282     ]
4283     for bom, enc in BOMS:
4284         if first_bytes.startswith(bom):
4285             s = first_bytes[len(bom):].decode(enc, 'replace')
4286             break
4287     else:
4288         s = first_bytes.decode('utf-8', 'replace')
4289
4290     return re.match(r'^\s*<', s)
4291
4292
4293 def determine_protocol(info_dict):
4294     protocol = info_dict.get('protocol')
4295     if protocol is not None:
4296         return protocol
4297
4298     url = info_dict['url']
4299     if url.startswith('rtmp'):
4300         return 'rtmp'
4301     elif url.startswith('mms'):
4302         return 'mms'
4303     elif url.startswith('rtsp'):
4304         return 'rtsp'
4305
4306     ext = determine_ext(url)
4307     if ext == 'm3u8':
4308         return 'm3u8'
4309     elif ext == 'f4m':
4310         return 'f4m'
4311
4312     return compat_urllib_parse_urlparse(url).scheme
4313
4314
4315 def render_table(header_row, data):
4316     """ Render a list of rows, each as a list of values """
4317     table = [header_row] + data
4318     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4319     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4320     return '\n'.join(format_str % tuple(row) for row in table)
4321
4322
4323 def _match_one(filter_part, dct):
4324     COMPARISON_OPERATORS = {
4325         '<': operator.lt,
4326         '<=': operator.le,
4327         '>': operator.gt,
4328         '>=': operator.ge,
4329         '=': operator.eq,
4330         '!=': operator.ne,
4331     }
4332     operator_rex = re.compile(r'''(?x)\s*
4333         (?P<key>[a-z_]+)
4334         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4335         (?:
4336             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4337             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4338             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4339         )
4340         \s*$
4341         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4342     m = operator_rex.search(filter_part)
4343     if m:
4344         op = COMPARISON_OPERATORS[m.group('op')]
4345         actual_value = dct.get(m.group('key'))
4346         if (m.group('quotedstrval') is not None
4347             or m.group('strval') is not None
4348             # If the original field is a string and matching comparisonvalue is
4349             # a number we should respect the origin of the original field
4350             # and process comparison value as a string (see
4351             # https://github.com/ytdl-org/youtube-dl/issues/11082).
4352             or actual_value is not None and m.group('intval') is not None
4353                 and isinstance(actual_value, compat_str)):
4354             if m.group('op') not in ('=', '!='):
4355                 raise ValueError(
4356                     'Operator %s does not support string values!' % m.group('op'))
4357             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4358             quote = m.group('quote')
4359             if quote is not None:
4360                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4361         else:
4362             try:
4363                 comparison_value = int(m.group('intval'))
4364             except ValueError:
4365                 comparison_value = parse_filesize(m.group('intval'))
4366                 if comparison_value is None:
4367                     comparison_value = parse_filesize(m.group('intval') + 'B')
4368                 if comparison_value is None:
4369                     raise ValueError(
4370                         'Invalid integer value %r in filter part %r' % (
4371                             m.group('intval'), filter_part))
4372         if actual_value is None:
4373             return m.group('none_inclusive')
4374         return op(actual_value, comparison_value)
4375
4376     UNARY_OPERATORS = {
4377         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4378         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4379     }
4380     operator_rex = re.compile(r'''(?x)\s*
4381         (?P<op>%s)\s*(?P<key>[a-z_]+)
4382         \s*$
4383         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4384     m = operator_rex.search(filter_part)
4385     if m:
4386         op = UNARY_OPERATORS[m.group('op')]
4387         actual_value = dct.get(m.group('key'))
4388         return op(actual_value)
4389
4390     raise ValueError('Invalid filter part %r' % filter_part)
4391
4392
4393 def match_str(filter_str, dct):
4394     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4395
4396     return all(
4397         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4398
4399
4400 def match_filter_func(filter_str):
4401     def _match_func(info_dict):
4402         if match_str(filter_str, info_dict):
4403             return None
4404         else:
4405             video_title = info_dict.get('title', info_dict.get('id', 'video'))
4406             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4407     return _match_func
4408
4409
4410 def parse_dfxp_time_expr(time_expr):
4411     if not time_expr:
4412         return
4413
4414     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4415     if mobj:
4416         return float(mobj.group('time_offset'))
4417
4418     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4419     if mobj:
4420         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4421
4422
4423 def srt_subtitles_timecode(seconds):
4424     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4425
4426
4427 def dfxp2srt(dfxp_data):
4428     '''
4429     @param dfxp_data A bytes-like object containing DFXP data
4430     @returns A unicode object containing converted SRT data
4431     '''
4432     LEGACY_NAMESPACES = (
4433         (b'http://www.w3.org/ns/ttml', [
4434             b'http://www.w3.org/2004/11/ttaf1',
4435             b'http://www.w3.org/2006/04/ttaf1',
4436             b'http://www.w3.org/2006/10/ttaf1',
4437         ]),
4438         (b'http://www.w3.org/ns/ttml#styling', [
4439             b'http://www.w3.org/ns/ttml#style',
4440         ]),
4441     )
4442
4443     SUPPORTED_STYLING = [
4444         'color',
4445         'fontFamily',
4446         'fontSize',
4447         'fontStyle',
4448         'fontWeight',
4449         'textDecoration'
4450     ]
4451
4452     _x = functools.partial(xpath_with_ns, ns_map={
4453         'xml': 'http://www.w3.org/XML/1998/namespace',
4454         'ttml': 'http://www.w3.org/ns/ttml',
4455         'tts': 'http://www.w3.org/ns/ttml#styling',
4456     })
4457
4458     styles = {}
4459     default_style = {}
4460
4461     class TTMLPElementParser(object):
4462         _out = ''
4463         _unclosed_elements = []
4464         _applied_styles = []
4465
4466         def start(self, tag, attrib):
4467             if tag in (_x('ttml:br'), 'br'):
4468                 self._out += '\n'
4469             else:
4470                 unclosed_elements = []
4471                 style = {}
4472                 element_style_id = attrib.get('style')
4473                 if default_style:
4474                     style.update(default_style)
4475                 if element_style_id:
4476                     style.update(styles.get(element_style_id, {}))
4477                 for prop in SUPPORTED_STYLING:
4478                     prop_val = attrib.get(_x('tts:' + prop))
4479                     if prop_val:
4480                         style[prop] = prop_val
4481                 if style:
4482                     font = ''
4483                     for k, v in sorted(style.items()):
4484                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
4485                             continue
4486                         if k == 'color':
4487                             font += ' color="%s"' % v
4488                         elif k == 'fontSize':
4489                             font += ' size="%s"' % v
4490                         elif k == 'fontFamily':
4491                             font += ' face="%s"' % v
4492                         elif k == 'fontWeight' and v == 'bold':
4493                             self._out += '<b>'
4494                             unclosed_elements.append('b')
4495                         elif k == 'fontStyle' and v == 'italic':
4496                             self._out += '<i>'
4497                             unclosed_elements.append('i')
4498                         elif k == 'textDecoration' and v == 'underline':
4499                             self._out += '<u>'
4500                             unclosed_elements.append('u')
4501                     if font:
4502                         self._out += '<font' + font + '>'
4503                         unclosed_elements.append('font')
4504                     applied_style = {}
4505                     if self._applied_styles:
4506                         applied_style.update(self._applied_styles[-1])
4507                     applied_style.update(style)
4508                     self._applied_styles.append(applied_style)
4509                 self._unclosed_elements.append(unclosed_elements)
4510
4511         def end(self, tag):
4512             if tag not in (_x('ttml:br'), 'br'):
4513                 unclosed_elements = self._unclosed_elements.pop()
4514                 for element in reversed(unclosed_elements):
4515                     self._out += '</%s>' % element
4516                 if unclosed_elements and self._applied_styles:
4517                     self._applied_styles.pop()
4518
4519         def data(self, data):
4520             self._out += data
4521
4522         def close(self):
4523             return self._out.strip()
4524
4525     def parse_node(node):
4526         target = TTMLPElementParser()
4527         parser = xml.etree.ElementTree.XMLParser(target=target)
4528         parser.feed(xml.etree.ElementTree.tostring(node))
4529         return parser.close()
4530
4531     for k, v in LEGACY_NAMESPACES:
4532         for ns in v:
4533             dfxp_data = dfxp_data.replace(ns, k)
4534
4535     dfxp = compat_etree_fromstring(dfxp_data)
4536     out = []
4537     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4538
4539     if not paras:
4540         raise ValueError('Invalid dfxp/TTML subtitle')
4541
4542     repeat = False
4543     while True:
4544         for style in dfxp.findall(_x('.//ttml:style')):
4545             style_id = style.get('id') or style.get(_x('xml:id'))
4546             if not style_id:
4547                 continue
4548             parent_style_id = style.get('style')
4549             if parent_style_id:
4550                 if parent_style_id not in styles:
4551                     repeat = True
4552                     continue
4553                 styles[style_id] = styles[parent_style_id].copy()
4554             for prop in SUPPORTED_STYLING:
4555                 prop_val = style.get(_x('tts:' + prop))
4556                 if prop_val:
4557                     styles.setdefault(style_id, {})[prop] = prop_val
4558         if repeat:
4559             repeat = False
4560         else:
4561             break
4562
4563     for p in ('body', 'div'):
4564         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4565         if ele is None:
4566             continue
4567         style = styles.get(ele.get('style'))
4568         if not style:
4569             continue
4570         default_style.update(style)
4571
4572     for para, index in zip(paras, itertools.count(1)):
4573         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4574         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4575         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4576         if begin_time is None:
4577             continue
4578         if not end_time:
4579             if not dur:
4580                 continue
4581             end_time = begin_time + dur
4582         out.append('%d\n%s --> %s\n%s\n\n' % (
4583             index,
4584             srt_subtitles_timecode(begin_time),
4585             srt_subtitles_timecode(end_time),
4586             parse_node(para)))
4587
4588     return ''.join(out)
4589
4590
4591 def cli_option(params, command_option, param):
4592     param = params.get(param)
4593     if param:
4594         param = compat_str(param)
4595     return [command_option, param] if param is not None else []
4596
4597
4598 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4599     param = params.get(param)
4600     if param is None:
4601         return []
4602     assert isinstance(param, bool)
4603     if separator:
4604         return [command_option + separator + (true_value if param else false_value)]
4605     return [command_option, true_value if param else false_value]
4606
4607
4608 def cli_valueless_option(params, command_option, param, expected_value=True):
4609     param = params.get(param)
4610     return [command_option] if param == expected_value else []
4611
4612
4613 def cli_configuration_args(params, param, default=[]):
4614     ex_args = params.get(param)
4615     if ex_args is None:
4616         return default
4617     assert isinstance(ex_args, list)
4618     return ex_args
4619
4620
4621 class ISO639Utils(object):
4622     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4623     _lang_map = {
4624         'aa': 'aar',
4625         'ab': 'abk',
4626         'ae': 'ave',
4627         'af': 'afr',
4628         'ak': 'aka',
4629         'am': 'amh',
4630         'an': 'arg',
4631         'ar': 'ara',
4632         'as': 'asm',
4633         'av': 'ava',
4634         'ay': 'aym',
4635         'az': 'aze',
4636         'ba': 'bak',
4637         'be': 'bel',
4638         'bg': 'bul',
4639         'bh': 'bih',
4640         'bi': 'bis',
4641         'bm': 'bam',
4642         'bn': 'ben',
4643         'bo': 'bod',
4644         'br': 'bre',
4645         'bs': 'bos',
4646         'ca': 'cat',
4647         'ce': 'che',
4648         'ch': 'cha',
4649         'co': 'cos',
4650         'cr': 'cre',
4651         'cs': 'ces',
4652         'cu': 'chu',
4653         'cv': 'chv',
4654         'cy': 'cym',
4655         'da': 'dan',
4656         'de': 'deu',
4657         'dv': 'div',
4658         'dz': 'dzo',
4659         'ee': 'ewe',
4660         'el': 'ell',
4661         'en': 'eng',
4662         'eo': 'epo',
4663         'es': 'spa',
4664         'et': 'est',
4665         'eu': 'eus',
4666         'fa': 'fas',
4667         'ff': 'ful',
4668         'fi': 'fin',
4669         'fj': 'fij',
4670         'fo': 'fao',
4671         'fr': 'fra',
4672         'fy': 'fry',
4673         'ga': 'gle',
4674         'gd': 'gla',
4675         'gl': 'glg',
4676         'gn': 'grn',
4677         'gu': 'guj',
4678         'gv': 'glv',
4679         'ha': 'hau',
4680         'he': 'heb',
4681         'iw': 'heb',  # Replaced by he in 1989 revision
4682         'hi': 'hin',
4683         'ho': 'hmo',
4684         'hr': 'hrv',
4685         'ht': 'hat',
4686         'hu': 'hun',
4687         'hy': 'hye',
4688         'hz': 'her',
4689         'ia': 'ina',
4690         'id': 'ind',
4691         'in': 'ind',  # Replaced by id in 1989 revision
4692         'ie': 'ile',
4693         'ig': 'ibo',
4694         'ii': 'iii',
4695         'ik': 'ipk',
4696         'io': 'ido',
4697         'is': 'isl',
4698         'it': 'ita',
4699         'iu': 'iku',
4700         'ja': 'jpn',
4701         'jv': 'jav',
4702         'ka': 'kat',
4703         'kg': 'kon',
4704         'ki': 'kik',
4705         'kj': 'kua',
4706         'kk': 'kaz',
4707         'kl': 'kal',
4708         'km': 'khm',
4709         'kn': 'kan',
4710         'ko': 'kor',
4711         'kr': 'kau',
4712         'ks': 'kas',
4713         'ku': 'kur',
4714         'kv': 'kom',
4715         'kw': 'cor',
4716         'ky': 'kir',
4717         'la': 'lat',
4718         'lb': 'ltz',
4719         'lg': 'lug',
4720         'li': 'lim',
4721         'ln': 'lin',
4722         'lo': 'lao',
4723         'lt': 'lit',
4724         'lu': 'lub',
4725         'lv': 'lav',
4726         'mg': 'mlg',
4727         'mh': 'mah',
4728         'mi': 'mri',
4729         'mk': 'mkd',
4730         'ml': 'mal',
4731         'mn': 'mon',
4732         'mr': 'mar',
4733         'ms': 'msa',
4734         'mt': 'mlt',
4735         'my': 'mya',
4736         'na': 'nau',
4737         'nb': 'nob',
4738         'nd': 'nde',
4739         'ne': 'nep',
4740         'ng': 'ndo',
4741         'nl': 'nld',
4742         'nn': 'nno',
4743         'no': 'nor',
4744         'nr': 'nbl',
4745         'nv': 'nav',
4746         'ny': 'nya',
4747         'oc': 'oci',
4748         'oj': 'oji',
4749         'om': 'orm',
4750         'or': 'ori',
4751         'os': 'oss',
4752         'pa': 'pan',
4753         'pi': 'pli',
4754         'pl': 'pol',
4755         'ps': 'pus',
4756         'pt': 'por',
4757         'qu': 'que',
4758         'rm': 'roh',
4759         'rn': 'run',
4760         'ro': 'ron',
4761         'ru': 'rus',
4762         'rw': 'kin',
4763         'sa': 'san',
4764         'sc': 'srd',
4765         'sd': 'snd',
4766         'se': 'sme',
4767         'sg': 'sag',
4768         'si': 'sin',
4769         'sk': 'slk',
4770         'sl': 'slv',
4771         'sm': 'smo',
4772         'sn': 'sna',
4773         'so': 'som',
4774         'sq': 'sqi',
4775         'sr': 'srp',
4776         'ss': 'ssw',
4777         'st': 'sot',
4778         'su': 'sun',
4779         'sv': 'swe',
4780         'sw': 'swa',
4781         'ta': 'tam',
4782         'te': 'tel',
4783         'tg': 'tgk',
4784         'th': 'tha',
4785         'ti': 'tir',
4786         'tk': 'tuk',
4787         'tl': 'tgl',
4788         'tn': 'tsn',
4789         'to': 'ton',
4790         'tr': 'tur',
4791         'ts': 'tso',
4792         'tt': 'tat',
4793         'tw': 'twi',
4794         'ty': 'tah',
4795         'ug': 'uig',
4796         'uk': 'ukr',
4797         'ur': 'urd',
4798         'uz': 'uzb',
4799         've': 'ven',
4800         'vi': 'vie',
4801         'vo': 'vol',
4802         'wa': 'wln',
4803         'wo': 'wol',
4804         'xh': 'xho',
4805         'yi': 'yid',
4806         'ji': 'yid',  # Replaced by yi in 1989 revision
4807         'yo': 'yor',
4808         'za': 'zha',
4809         'zh': 'zho',
4810         'zu': 'zul',
4811     }
4812
4813     @classmethod
4814     def short2long(cls, code):
4815         """Convert language code from ISO 639-1 to ISO 639-2/T"""
4816         return cls._lang_map.get(code[:2])
4817
4818     @classmethod
4819     def long2short(cls, code):
4820         """Convert language code from ISO 639-2/T to ISO 639-1"""
4821         for short_name, long_name in cls._lang_map.items():
4822             if long_name == code:
4823                 return short_name
4824
4825
4826 class ISO3166Utils(object):
4827     # From http://data.okfn.org/data/core/country-list
4828     _country_map = {
4829         'AF': 'Afghanistan',
4830         'AX': 'Åland Islands',
4831         'AL': 'Albania',
4832         'DZ': 'Algeria',
4833         'AS': 'American Samoa',
4834         'AD': 'Andorra',
4835         'AO': 'Angola',
4836         'AI': 'Anguilla',
4837         'AQ': 'Antarctica',
4838         'AG': 'Antigua and Barbuda',
4839         'AR': 'Argentina',
4840         'AM': 'Armenia',
4841         'AW': 'Aruba',
4842         'AU': 'Australia',
4843         'AT': 'Austria',
4844         'AZ': 'Azerbaijan',
4845         'BS': 'Bahamas',
4846         'BH': 'Bahrain',
4847         'BD': 'Bangladesh',
4848         'BB': 'Barbados',
4849         'BY': 'Belarus',
4850         'BE': 'Belgium',
4851         'BZ': 'Belize',
4852         'BJ': 'Benin',
4853         'BM': 'Bermuda',
4854         'BT': 'Bhutan',
4855         'BO': 'Bolivia, Plurinational State of',
4856         'BQ': 'Bonaire, Sint Eustatius and Saba',
4857         'BA': 'Bosnia and Herzegovina',
4858         'BW': 'Botswana',
4859         'BV': 'Bouvet Island',
4860         'BR': 'Brazil',
4861         'IO': 'British Indian Ocean Territory',
4862         'BN': 'Brunei Darussalam',
4863         'BG': 'Bulgaria',
4864         'BF': 'Burkina Faso',
4865         'BI': 'Burundi',
4866         'KH': 'Cambodia',
4867         'CM': 'Cameroon',
4868         'CA': 'Canada',
4869         'CV': 'Cape Verde',
4870         'KY': 'Cayman Islands',
4871         'CF': 'Central African Republic',
4872         'TD': 'Chad',
4873         'CL': 'Chile',
4874         'CN': 'China',
4875         'CX': 'Christmas Island',
4876         'CC': 'Cocos (Keeling) Islands',
4877         'CO': 'Colombia',
4878         'KM': 'Comoros',
4879         'CG': 'Congo',
4880         'CD': 'Congo, the Democratic Republic of the',
4881         'CK': 'Cook Islands',
4882         'CR': 'Costa Rica',
4883         'CI': 'Côte d\'Ivoire',
4884         'HR': 'Croatia',
4885         'CU': 'Cuba',
4886         'CW': 'Curaçao',
4887         'CY': 'Cyprus',
4888         'CZ': 'Czech Republic',
4889         'DK': 'Denmark',
4890         'DJ': 'Djibouti',
4891         'DM': 'Dominica',
4892         'DO': 'Dominican Republic',
4893         'EC': 'Ecuador',
4894         'EG': 'Egypt',
4895         'SV': 'El Salvador',
4896         'GQ': 'Equatorial Guinea',
4897         'ER': 'Eritrea',
4898         'EE': 'Estonia',
4899         'ET': 'Ethiopia',
4900         'FK': 'Falkland Islands (Malvinas)',
4901         'FO': 'Faroe Islands',
4902         'FJ': 'Fiji',
4903         'FI': 'Finland',
4904         'FR': 'France',
4905         'GF': 'French Guiana',
4906         'PF': 'French Polynesia',
4907         'TF': 'French Southern Territories',
4908         'GA': 'Gabon',
4909         'GM': 'Gambia',
4910         'GE': 'Georgia',
4911         'DE': 'Germany',
4912         'GH': 'Ghana',
4913         'GI': 'Gibraltar',
4914         'GR': 'Greece',
4915         'GL': 'Greenland',
4916         'GD': 'Grenada',
4917         'GP': 'Guadeloupe',
4918         'GU': 'Guam',
4919         'GT': 'Guatemala',
4920         'GG': 'Guernsey',
4921         'GN': 'Guinea',
4922         'GW': 'Guinea-Bissau',
4923         'GY': 'Guyana',
4924         'HT': 'Haiti',
4925         'HM': 'Heard Island and McDonald Islands',
4926         'VA': 'Holy See (Vatican City State)',
4927         'HN': 'Honduras',
4928         'HK': 'Hong Kong',
4929         'HU': 'Hungary',
4930         'IS': 'Iceland',
4931         'IN': 'India',
4932         'ID': 'Indonesia',
4933         'IR': 'Iran, Islamic Republic of',
4934         'IQ': 'Iraq',
4935         'IE': 'Ireland',
4936         'IM': 'Isle of Man',
4937         'IL': 'Israel',
4938         'IT': 'Italy',
4939         'JM': 'Jamaica',
4940         'JP': 'Japan',
4941         'JE': 'Jersey',
4942         'JO': 'Jordan',
4943         'KZ': 'Kazakhstan',
4944         'KE': 'Kenya',
4945         'KI': 'Kiribati',
4946         'KP': 'Korea, Democratic People\'s Republic of',
4947         'KR': 'Korea, Republic of',
4948         'KW': 'Kuwait',
4949         'KG': 'Kyrgyzstan',
4950         'LA': 'Lao People\'s Democratic Republic',
4951         'LV': 'Latvia',
4952         'LB': 'Lebanon',
4953         'LS': 'Lesotho',
4954         'LR': 'Liberia',
4955         'LY': 'Libya',
4956         'LI': 'Liechtenstein',
4957         'LT': 'Lithuania',
4958         'LU': 'Luxembourg',
4959         'MO': 'Macao',
4960         'MK': 'Macedonia, the Former Yugoslav Republic of',
4961         'MG': 'Madagascar',
4962         'MW': 'Malawi',
4963         'MY': 'Malaysia',
4964         'MV': 'Maldives',
4965         'ML': 'Mali',
4966         'MT': 'Malta',
4967         'MH': 'Marshall Islands',
4968         'MQ': 'Martinique',
4969         'MR': 'Mauritania',
4970         'MU': 'Mauritius',
4971         'YT': 'Mayotte',
4972         'MX': 'Mexico',
4973         'FM': 'Micronesia, Federated States of',
4974         'MD': 'Moldova, Republic of',
4975         'MC': 'Monaco',
4976         'MN': 'Mongolia',
4977         'ME': 'Montenegro',
4978         'MS': 'Montserrat',
4979         'MA': 'Morocco',
4980         'MZ': 'Mozambique',
4981         'MM': 'Myanmar',
4982         'NA': 'Namibia',
4983         'NR': 'Nauru',
4984         'NP': 'Nepal',
4985         'NL': 'Netherlands',
4986         'NC': 'New Caledonia',
4987         'NZ': 'New Zealand',
4988         'NI': 'Nicaragua',
4989         'NE': 'Niger',
4990         'NG': 'Nigeria',
4991         'NU': 'Niue',
4992         'NF': 'Norfolk Island',
4993         'MP': 'Northern Mariana Islands',
4994         'NO': 'Norway',
4995         'OM': 'Oman',
4996         'PK': 'Pakistan',
4997         'PW': 'Palau',
4998         'PS': 'Palestine, State of',
4999         'PA': 'Panama',
5000         'PG': 'Papua New Guinea',
5001         'PY': 'Paraguay',
5002         'PE': 'Peru',
5003         'PH': 'Philippines',
5004         'PN': 'Pitcairn',
5005         'PL': 'Poland',
5006         'PT': 'Portugal',
5007         'PR': 'Puerto Rico',
5008         'QA': 'Qatar',
5009         'RE': 'Réunion',
5010         'RO': 'Romania',
5011         'RU': 'Russian Federation',
5012         'RW': 'Rwanda',
5013         'BL': 'Saint Barthélemy',
5014         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5015         'KN': 'Saint Kitts and Nevis',
5016         'LC': 'Saint Lucia',
5017         'MF': 'Saint Martin (French part)',
5018         'PM': 'Saint Pierre and Miquelon',
5019         'VC': 'Saint Vincent and the Grenadines',
5020         'WS': 'Samoa',
5021         'SM': 'San Marino',
5022         'ST': 'Sao Tome and Principe',
5023         'SA': 'Saudi Arabia',
5024         'SN': 'Senegal',
5025         'RS': 'Serbia',
5026         'SC': 'Seychelles',
5027         'SL': 'Sierra Leone',
5028         'SG': 'Singapore',
5029         'SX': 'Sint Maarten (Dutch part)',
5030         'SK': 'Slovakia',
5031         'SI': 'Slovenia',
5032         'SB': 'Solomon Islands',
5033         'SO': 'Somalia',
5034         'ZA': 'South Africa',
5035         'GS': 'South Georgia and the South Sandwich Islands',
5036         'SS': 'South Sudan',
5037         'ES': 'Spain',
5038         'LK': 'Sri Lanka',
5039         'SD': 'Sudan',
5040         'SR': 'Suriname',
5041         'SJ': 'Svalbard and Jan Mayen',
5042         'SZ': 'Swaziland',
5043         'SE': 'Sweden',
5044         'CH': 'Switzerland',
5045         'SY': 'Syrian Arab Republic',
5046         'TW': 'Taiwan, Province of China',
5047         'TJ': 'Tajikistan',
5048         'TZ': 'Tanzania, United Republic of',
5049         'TH': 'Thailand',
5050         'TL': 'Timor-Leste',
5051         'TG': 'Togo',
5052         'TK': 'Tokelau',
5053         'TO': 'Tonga',
5054         'TT': 'Trinidad and Tobago',
5055         'TN': 'Tunisia',
5056         'TR': 'Turkey',
5057         'TM': 'Turkmenistan',
5058         'TC': 'Turks and Caicos Islands',
5059         'TV': 'Tuvalu',
5060         'UG': 'Uganda',
5061         'UA': 'Ukraine',
5062         'AE': 'United Arab Emirates',
5063         'GB': 'United Kingdom',
5064         'US': 'United States',
5065         'UM': 'United States Minor Outlying Islands',
5066         'UY': 'Uruguay',
5067         'UZ': 'Uzbekistan',
5068         'VU': 'Vanuatu',
5069         'VE': 'Venezuela, Bolivarian Republic of',
5070         'VN': 'Viet Nam',
5071         'VG': 'Virgin Islands, British',
5072         'VI': 'Virgin Islands, U.S.',
5073         'WF': 'Wallis and Futuna',
5074         'EH': 'Western Sahara',
5075         'YE': 'Yemen',
5076         'ZM': 'Zambia',
5077         'ZW': 'Zimbabwe',
5078     }
5079
5080     @classmethod
5081     def short2full(cls, code):
5082         """Convert an ISO 3166-2 country code to the corresponding full name"""
5083         return cls._country_map.get(code.upper())
5084
5085
5086 class GeoUtils(object):
5087     # Major IPv4 address blocks per country
5088     _country_ip_map = {
5089         'AD': '46.172.224.0/19',
5090         'AE': '94.200.0.0/13',
5091         'AF': '149.54.0.0/17',
5092         'AG': '209.59.64.0/18',
5093         'AI': '204.14.248.0/21',
5094         'AL': '46.99.0.0/16',
5095         'AM': '46.70.0.0/15',
5096         'AO': '105.168.0.0/13',
5097         'AP': '182.50.184.0/21',
5098         'AQ': '23.154.160.0/24',
5099         'AR': '181.0.0.0/12',
5100         'AS': '202.70.112.0/20',
5101         'AT': '77.116.0.0/14',
5102         'AU': '1.128.0.0/11',
5103         'AW': '181.41.0.0/18',
5104         'AX': '185.217.4.0/22',
5105         'AZ': '5.197.0.0/16',
5106         'BA': '31.176.128.0/17',
5107         'BB': '65.48.128.0/17',
5108         'BD': '114.130.0.0/16',
5109         'BE': '57.0.0.0/8',
5110         'BF': '102.178.0.0/15',
5111         'BG': '95.42.0.0/15',
5112         'BH': '37.131.0.0/17',
5113         'BI': '154.117.192.0/18',
5114         'BJ': '137.255.0.0/16',
5115         'BL': '185.212.72.0/23',
5116         'BM': '196.12.64.0/18',
5117         'BN': '156.31.0.0/16',
5118         'BO': '161.56.0.0/16',
5119         'BQ': '161.0.80.0/20',
5120         'BR': '191.128.0.0/12',
5121         'BS': '24.51.64.0/18',
5122         'BT': '119.2.96.0/19',
5123         'BW': '168.167.0.0/16',
5124         'BY': '178.120.0.0/13',
5125         'BZ': '179.42.192.0/18',
5126         'CA': '99.224.0.0/11',
5127         'CD': '41.243.0.0/16',
5128         'CF': '197.242.176.0/21',
5129         'CG': '160.113.0.0/16',
5130         'CH': '85.0.0.0/13',
5131         'CI': '102.136.0.0/14',
5132         'CK': '202.65.32.0/19',
5133         'CL': '152.172.0.0/14',
5134         'CM': '102.244.0.0/14',
5135         'CN': '36.128.0.0/10',
5136         'CO': '181.240.0.0/12',
5137         'CR': '201.192.0.0/12',
5138         'CU': '152.206.0.0/15',
5139         'CV': '165.90.96.0/19',
5140         'CW': '190.88.128.0/17',
5141         'CY': '31.153.0.0/16',
5142         'CZ': '88.100.0.0/14',
5143         'DE': '53.0.0.0/8',
5144         'DJ': '197.241.0.0/17',
5145         'DK': '87.48.0.0/12',
5146         'DM': '192.243.48.0/20',
5147         'DO': '152.166.0.0/15',
5148         'DZ': '41.96.0.0/12',
5149         'EC': '186.68.0.0/15',
5150         'EE': '90.190.0.0/15',
5151         'EG': '156.160.0.0/11',
5152         'ER': '196.200.96.0/20',
5153         'ES': '88.0.0.0/11',
5154         'ET': '196.188.0.0/14',
5155         'EU': '2.16.0.0/13',
5156         'FI': '91.152.0.0/13',
5157         'FJ': '144.120.0.0/16',
5158         'FK': '80.73.208.0/21',
5159         'FM': '119.252.112.0/20',
5160         'FO': '88.85.32.0/19',
5161         'FR': '90.0.0.0/9',
5162         'GA': '41.158.0.0/15',
5163         'GB': '25.0.0.0/8',
5164         'GD': '74.122.88.0/21',
5165         'GE': '31.146.0.0/16',
5166         'GF': '161.22.64.0/18',
5167         'GG': '62.68.160.0/19',
5168         'GH': '154.160.0.0/12',
5169         'GI': '95.164.0.0/16',
5170         'GL': '88.83.0.0/19',
5171         'GM': '160.182.0.0/15',
5172         'GN': '197.149.192.0/18',
5173         'GP': '104.250.0.0/19',
5174         'GQ': '105.235.224.0/20',
5175         'GR': '94.64.0.0/13',
5176         'GT': '168.234.0.0/16',
5177         'GU': '168.123.0.0/16',
5178         'GW': '197.214.80.0/20',
5179         'GY': '181.41.64.0/18',
5180         'HK': '113.252.0.0/14',
5181         'HN': '181.210.0.0/16',
5182         'HR': '93.136.0.0/13',
5183         'HT': '148.102.128.0/17',
5184         'HU': '84.0.0.0/14',
5185         'ID': '39.192.0.0/10',
5186         'IE': '87.32.0.0/12',
5187         'IL': '79.176.0.0/13',
5188         'IM': '5.62.80.0/20',
5189         'IN': '117.192.0.0/10',
5190         'IO': '203.83.48.0/21',
5191         'IQ': '37.236.0.0/14',
5192         'IR': '2.176.0.0/12',
5193         'IS': '82.221.0.0/16',
5194         'IT': '79.0.0.0/10',
5195         'JE': '87.244.64.0/18',
5196         'JM': '72.27.0.0/17',
5197         'JO': '176.29.0.0/16',
5198         'JP': '133.0.0.0/8',
5199         'KE': '105.48.0.0/12',
5200         'KG': '158.181.128.0/17',
5201         'KH': '36.37.128.0/17',
5202         'KI': '103.25.140.0/22',
5203         'KM': '197.255.224.0/20',
5204         'KN': '198.167.192.0/19',
5205         'KP': '175.45.176.0/22',
5206         'KR': '175.192.0.0/10',
5207         'KW': '37.36.0.0/14',
5208         'KY': '64.96.0.0/15',
5209         'KZ': '2.72.0.0/13',
5210         'LA': '115.84.64.0/18',
5211         'LB': '178.135.0.0/16',
5212         'LC': '24.92.144.0/20',
5213         'LI': '82.117.0.0/19',
5214         'LK': '112.134.0.0/15',
5215         'LR': '102.183.0.0/16',
5216         'LS': '129.232.0.0/17',
5217         'LT': '78.56.0.0/13',
5218         'LU': '188.42.0.0/16',
5219         'LV': '46.109.0.0/16',
5220         'LY': '41.252.0.0/14',
5221         'MA': '105.128.0.0/11',
5222         'MC': '88.209.64.0/18',
5223         'MD': '37.246.0.0/16',
5224         'ME': '178.175.0.0/17',
5225         'MF': '74.112.232.0/21',
5226         'MG': '154.126.0.0/17',
5227         'MH': '117.103.88.0/21',
5228         'MK': '77.28.0.0/15',
5229         'ML': '154.118.128.0/18',
5230         'MM': '37.111.0.0/17',
5231         'MN': '49.0.128.0/17',
5232         'MO': '60.246.0.0/16',
5233         'MP': '202.88.64.0/20',
5234         'MQ': '109.203.224.0/19',
5235         'MR': '41.188.64.0/18',
5236         'MS': '208.90.112.0/22',
5237         'MT': '46.11.0.0/16',
5238         'MU': '105.16.0.0/12',
5239         'MV': '27.114.128.0/18',
5240         'MW': '102.70.0.0/15',
5241         'MX': '187.192.0.0/11',
5242         'MY': '175.136.0.0/13',
5243         'MZ': '197.218.0.0/15',
5244         'NA': '41.182.0.0/16',
5245         'NC': '101.101.0.0/18',
5246         'NE': '197.214.0.0/18',
5247         'NF': '203.17.240.0/22',
5248         'NG': '105.112.0.0/12',
5249         'NI': '186.76.0.0/15',
5250         'NL': '145.96.0.0/11',
5251         'NO': '84.208.0.0/13',
5252         'NP': '36.252.0.0/15',
5253         'NR': '203.98.224.0/19',
5254         'NU': '49.156.48.0/22',
5255         'NZ': '49.224.0.0/14',
5256         'OM': '5.36.0.0/15',
5257         'PA': '186.72.0.0/15',
5258         'PE': '186.160.0.0/14',
5259         'PF': '123.50.64.0/18',
5260         'PG': '124.240.192.0/19',
5261         'PH': '49.144.0.0/13',
5262         'PK': '39.32.0.0/11',
5263         'PL': '83.0.0.0/11',
5264         'PM': '70.36.0.0/20',
5265         'PR': '66.50.0.0/16',
5266         'PS': '188.161.0.0/16',
5267         'PT': '85.240.0.0/13',
5268         'PW': '202.124.224.0/20',
5269         'PY': '181.120.0.0/14',
5270         'QA': '37.210.0.0/15',
5271         'RE': '102.35.0.0/16',
5272         'RO': '79.112.0.0/13',
5273         'RS': '93.86.0.0/15',
5274         'RU': '5.136.0.0/13',
5275         'RW': '41.186.0.0/16',
5276         'SA': '188.48.0.0/13',
5277         'SB': '202.1.160.0/19',
5278         'SC': '154.192.0.0/11',
5279         'SD': '102.120.0.0/13',
5280         'SE': '78.64.0.0/12',
5281         'SG': '8.128.0.0/10',
5282         'SI': '188.196.0.0/14',
5283         'SK': '78.98.0.0/15',
5284         'SL': '102.143.0.0/17',
5285         'SM': '89.186.32.0/19',
5286         'SN': '41.82.0.0/15',
5287         'SO': '154.115.192.0/18',
5288         'SR': '186.179.128.0/17',
5289         'SS': '105.235.208.0/21',
5290         'ST': '197.159.160.0/19',
5291         'SV': '168.243.0.0/16',
5292         'SX': '190.102.0.0/20',
5293         'SY': '5.0.0.0/16',
5294         'SZ': '41.84.224.0/19',
5295         'TC': '65.255.48.0/20',
5296         'TD': '154.68.128.0/19',
5297         'TG': '196.168.0.0/14',
5298         'TH': '171.96.0.0/13',
5299         'TJ': '85.9.128.0/18',
5300         'TK': '27.96.24.0/21',
5301         'TL': '180.189.160.0/20',
5302         'TM': '95.85.96.0/19',
5303         'TN': '197.0.0.0/11',
5304         'TO': '175.176.144.0/21',
5305         'TR': '78.160.0.0/11',
5306         'TT': '186.44.0.0/15',
5307         'TV': '202.2.96.0/19',
5308         'TW': '120.96.0.0/11',
5309         'TZ': '156.156.0.0/14',
5310         'UA': '37.52.0.0/14',
5311         'UG': '102.80.0.0/13',
5312         'US': '6.0.0.0/8',
5313         'UY': '167.56.0.0/13',
5314         'UZ': '84.54.64.0/18',
5315         'VA': '212.77.0.0/19',
5316         'VC': '207.191.240.0/21',
5317         'VE': '186.88.0.0/13',
5318         'VG': '66.81.192.0/20',
5319         'VI': '146.226.0.0/16',
5320         'VN': '14.160.0.0/11',
5321         'VU': '202.80.32.0/20',
5322         'WF': '117.20.32.0/21',
5323         'WS': '202.4.32.0/19',
5324         'YE': '134.35.0.0/16',
5325         'YT': '41.242.116.0/22',
5326         'ZA': '41.0.0.0/11',
5327         'ZM': '102.144.0.0/13',
5328         'ZW': '102.177.192.0/18',
5329     }
5330
5331     @classmethod
5332     def random_ipv4(cls, code_or_block):
5333         if len(code_or_block) == 2:
5334             block = cls._country_ip_map.get(code_or_block.upper())
5335             if not block:
5336                 return None
5337         else:
5338             block = code_or_block
5339         addr, preflen = block.split('/')
5340         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5341         addr_max = addr_min | (0xffffffff >> int(preflen))
5342         return compat_str(socket.inet_ntoa(
5343             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5344
5345
5346 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5347     def __init__(self, proxies=None):
5348         # Set default handlers
5349         for type in ('http', 'https'):
5350             setattr(self, '%s_open' % type,
5351                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5352                         meth(r, proxy, type))
5353         compat_urllib_request.ProxyHandler.__init__(self, proxies)
5354
5355     def proxy_open(self, req, proxy, type):
5356         req_proxy = req.headers.get('Ytdl-request-proxy')
5357         if req_proxy is not None:
5358             proxy = req_proxy
5359             del req.headers['Ytdl-request-proxy']
5360
5361         if proxy == '__noproxy__':
5362             return None  # No Proxy
5363         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5364             req.add_header('Ytdl-socks-proxy', proxy)
5365             # youtube-dlc's http/https handlers do wrapping the socket with socks
5366             return None
5367         return compat_urllib_request.ProxyHandler.proxy_open(
5368             self, req, proxy, type)
5369
5370
5371 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5372 # released into Public Domain
5373 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5374
5375 def long_to_bytes(n, blocksize=0):
5376     """long_to_bytes(n:long, blocksize:int) : string
5377     Convert a long integer to a byte string.
5378
5379     If optional blocksize is given and greater than zero, pad the front of the
5380     byte string with binary zeros so that the length is a multiple of
5381     blocksize.
5382     """
5383     # after much testing, this algorithm was deemed to be the fastest
5384     s = b''
5385     n = int(n)
5386     while n > 0:
5387         s = compat_struct_pack('>I', n & 0xffffffff) + s
5388         n = n >> 32
5389     # strip off leading zeros
5390     for i in range(len(s)):
5391         if s[i] != b'\000'[0]:
5392             break
5393     else:
5394         # only happens when n == 0
5395         s = b'\000'
5396         i = 0
5397     s = s[i:]
5398     # add back some pad bytes.  this could be done more efficiently w.r.t. the
5399     # de-padding being done above, but sigh...
5400     if blocksize > 0 and len(s) % blocksize:
5401         s = (blocksize - len(s) % blocksize) * b'\000' + s
5402     return s
5403
5404
5405 def bytes_to_long(s):
5406     """bytes_to_long(string) : long
5407     Convert a byte string to a long integer.
5408
5409     This is (essentially) the inverse of long_to_bytes().
5410     """
5411     acc = 0
5412     length = len(s)
5413     if length % 4:
5414         extra = (4 - length % 4)
5415         s = b'\000' * extra + s
5416         length = length + extra
5417     for i in range(0, length, 4):
5418         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5419     return acc
5420
5421
5422 def ohdave_rsa_encrypt(data, exponent, modulus):
5423     '''
5424     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5425
5426     Input:
5427         data: data to encrypt, bytes-like object
5428         exponent, modulus: parameter e and N of RSA algorithm, both integer
5429     Output: hex string of encrypted data
5430
5431     Limitation: supports one block encryption only
5432     '''
5433
5434     payload = int(binascii.hexlify(data[::-1]), 16)
5435     encrypted = pow(payload, exponent, modulus)
5436     return '%x' % encrypted
5437
5438
5439 def pkcs1pad(data, length):
5440     """
5441     Padding input data with PKCS#1 scheme
5442
5443     @param {int[]} data        input data
5444     @param {int}   length      target length
5445     @returns {int[]}           padded data
5446     """
5447     if len(data) > length - 11:
5448         raise ValueError('Input data too long for PKCS#1 padding')
5449
5450     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5451     return [0, 2] + pseudo_random + [0] + data
5452
5453
5454 def encode_base_n(num, n, table=None):
5455     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5456     if not table:
5457         table = FULL_TABLE[:n]
5458
5459     if n > len(table):
5460         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5461
5462     if num == 0:
5463         return table[0]
5464
5465     ret = ''
5466     while num:
5467         ret = table[num % n] + ret
5468         num = num // n
5469     return ret
5470
5471
5472 def decode_packed_codes(code):
5473     mobj = re.search(PACKED_CODES_RE, code)
5474     obfucasted_code, base, count, symbols = mobj.groups()
5475     base = int(base)
5476     count = int(count)
5477     symbols = symbols.split('|')
5478     symbol_table = {}
5479
5480     while count:
5481         count -= 1
5482         base_n_count = encode_base_n(count, base)
5483         symbol_table[base_n_count] = symbols[count] or base_n_count
5484
5485     return re.sub(
5486         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5487         obfucasted_code)
5488
5489
5490 def caesar(s, alphabet, shift):
5491     if shift == 0:
5492         return s
5493     l = len(alphabet)
5494     return ''.join(
5495         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5496         for c in s)
5497
5498
5499 def rot47(s):
5500     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5501
5502
5503 def parse_m3u8_attributes(attrib):
5504     info = {}
5505     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5506         if val.startswith('"'):
5507             val = val[1:-1]
5508         info[key] = val
5509     return info
5510
5511
5512 def urshift(val, n):
5513     return val >> n if val >= 0 else (val + 0x100000000) >> n
5514
5515
5516 # Based on png2str() written by @gdkchan and improved by @yokrysty
5517 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5518 def decode_png(png_data):
5519     # Reference: https://www.w3.org/TR/PNG/
5520     header = png_data[8:]
5521
5522     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5523         raise IOError('Not a valid PNG file.')
5524
5525     int_map = {1: '>B', 2: '>H', 4: '>I'}
5526     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5527
5528     chunks = []
5529
5530     while header:
5531         length = unpack_integer(header[:4])
5532         header = header[4:]
5533
5534         chunk_type = header[:4]
5535         header = header[4:]
5536
5537         chunk_data = header[:length]
5538         header = header[length:]
5539
5540         header = header[4:]  # Skip CRC
5541
5542         chunks.append({
5543             'type': chunk_type,
5544             'length': length,
5545             'data': chunk_data
5546         })
5547
5548     ihdr = chunks[0]['data']
5549
5550     width = unpack_integer(ihdr[:4])
5551     height = unpack_integer(ihdr[4:8])
5552
5553     idat = b''
5554
5555     for chunk in chunks:
5556         if chunk['type'] == b'IDAT':
5557             idat += chunk['data']
5558
5559     if not idat:
5560         raise IOError('Unable to read PNG data.')
5561
5562     decompressed_data = bytearray(zlib.decompress(idat))
5563
5564     stride = width * 3
5565     pixels = []
5566
5567     def _get_pixel(idx):
5568         x = idx % stride
5569         y = idx // stride
5570         return pixels[y][x]
5571
5572     for y in range(height):
5573         basePos = y * (1 + stride)
5574         filter_type = decompressed_data[basePos]
5575
5576         current_row = []
5577
5578         pixels.append(current_row)
5579
5580         for x in range(stride):
5581             color = decompressed_data[1 + basePos + x]
5582             basex = y * stride + x
5583             left = 0
5584             up = 0
5585
5586             if x > 2:
5587                 left = _get_pixel(basex - 3)
5588             if y > 0:
5589                 up = _get_pixel(basex - stride)
5590
5591             if filter_type == 1:  # Sub
5592                 color = (color + left) & 0xff
5593             elif filter_type == 2:  # Up
5594                 color = (color + up) & 0xff
5595             elif filter_type == 3:  # Average
5596                 color = (color + ((left + up) >> 1)) & 0xff
5597             elif filter_type == 4:  # Paeth
5598                 a = left
5599                 b = up
5600                 c = 0
5601
5602                 if x > 2 and y > 0:
5603                     c = _get_pixel(basex - stride - 3)
5604
5605                 p = a + b - c
5606
5607                 pa = abs(p - a)
5608                 pb = abs(p - b)
5609                 pc = abs(p - c)
5610
5611                 if pa <= pb and pa <= pc:
5612                     color = (color + a) & 0xff
5613                 elif pb <= pc:
5614                     color = (color + b) & 0xff
5615                 else:
5616                     color = (color + c) & 0xff
5617
5618             current_row.append(color)
5619
5620     return width, height, pixels
5621
5622
5623 def write_xattr(path, key, value):
5624     # This mess below finds the best xattr tool for the job
5625     try:
5626         # try the pyxattr module...
5627         import xattr
5628
5629         if hasattr(xattr, 'set'):  # pyxattr
5630             # Unicode arguments are not supported in python-pyxattr until
5631             # version 0.5.0
5632             # See https://github.com/ytdl-org/youtube-dl/issues/5498
5633             pyxattr_required_version = '0.5.0'
5634             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5635                 # TODO: fallback to CLI tools
5636                 raise XAttrUnavailableError(
5637                     'python-pyxattr is detected but is too old. '
5638                     'youtube-dlc requires %s or above while your version is %s. '
5639                     'Falling back to other xattr implementations' % (
5640                         pyxattr_required_version, xattr.__version__))
5641
5642             setxattr = xattr.set
5643         else:  # xattr
5644             setxattr = xattr.setxattr
5645
5646         try:
5647             setxattr(path, key, value)
5648         except EnvironmentError as e:
5649             raise XAttrMetadataError(e.errno, e.strerror)
5650
5651     except ImportError:
5652         if compat_os_name == 'nt':
5653             # Write xattrs to NTFS Alternate Data Streams:
5654             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5655             assert ':' not in key
5656             assert os.path.exists(path)
5657
5658             ads_fn = path + ':' + key
5659             try:
5660                 with open(ads_fn, 'wb') as f:
5661                     f.write(value)
5662             except EnvironmentError as e:
5663                 raise XAttrMetadataError(e.errno, e.strerror)
5664         else:
5665             user_has_setfattr = check_executable('setfattr', ['--version'])
5666             user_has_xattr = check_executable('xattr', ['-h'])
5667
5668             if user_has_setfattr or user_has_xattr:
5669
5670                 value = value.decode('utf-8')
5671                 if user_has_setfattr:
5672                     executable = 'setfattr'
5673                     opts = ['-n', key, '-v', value]
5674                 elif user_has_xattr:
5675                     executable = 'xattr'
5676                     opts = ['-w', key, value]
5677
5678                 cmd = ([encodeFilename(executable, True)]
5679                        + [encodeArgument(o) for o in opts]
5680                        + [encodeFilename(path, True)])
5681
5682                 try:
5683                     p = subprocess.Popen(
5684                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5685                 except EnvironmentError as e:
5686                     raise XAttrMetadataError(e.errno, e.strerror)
5687                 stdout, stderr = p.communicate()
5688                 stderr = stderr.decode('utf-8', 'replace')
5689                 if p.returncode != 0:
5690                     raise XAttrMetadataError(p.returncode, stderr)
5691
5692             else:
5693                 # On Unix, and can't find pyxattr, setfattr, or xattr.
5694                 if sys.platform.startswith('linux'):
5695                     raise XAttrUnavailableError(
5696                         "Couldn't find a tool to set the xattrs. "
5697                         "Install either the python 'pyxattr' or 'xattr' "
5698                         "modules, or the GNU 'attr' package "
5699                         "(which contains the 'setfattr' tool).")
5700                 else:
5701                     raise XAttrUnavailableError(
5702                         "Couldn't find a tool to set the xattrs. "
5703                         "Install either the python 'xattr' module, "
5704                         "or the 'xattr' binary.")
5705
5706
5707 def random_birthday(year_field, month_field, day_field):
5708     start_date = datetime.date(1950, 1, 1)
5709     end_date = datetime.date(1995, 12, 31)
5710     offset = random.randint(0, (end_date - start_date).days)
5711     random_date = start_date + datetime.timedelta(offset)
5712     return {
5713         year_field: str(random_date.year),
5714         month_field: str(random_date.month),
5715         day_field: str(random_date.day),
5716     }