]> jfr.im git - yt-dlp.git/commitdiff
merge youtube-dl master 22.09.2020
authorUnknown <redacted>
Tue, 22 Sep 2020 14:09:54 +0000 (16:09 +0200)
committerUnknown <redacted>
Tue, 22 Sep 2020 14:09:54 +0000 (16:09 +0200)
1  2 
youtube_dlc/downloader/hls.py
youtube_dlc/downloader/http.py
youtube_dlc/extractor/common.py
youtube_dlc/extractor/pornhub.py
youtube_dlc/extractor/redtube.py
youtube_dlc/extractor/telequebec.py
youtube_dlc/extractor/twitch.py

index 84bc349288052fe15e92be61e72645db007003d0,0f2c06f40414fd94154c69d2aad365880dcb0e20..0f2c06f40414fd94154c69d2aad365880dcb0e20
@@@ -141,7 -141,7 +141,7 @@@ class HlsFD(FragmentFD)
                      count = 0
                      headers = info_dict.get('http_headers', {})
                      if byte_range:
-                         headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
+                         headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
                      while count <= fragment_retries:
                          try:
                              success, frag_content = self._download_fragment(
index 5046878dfcd874013e737e85d32764a95737406e,96379caf1ec2f462c7aadfaa02589778ba07e548..96379caf1ec2f462c7aadfaa02589778ba07e548
@@@ -106,7 -106,12 +106,12 @@@ class HttpFD(FileDownloader)
                  set_range(request, range_start, range_end)
              # Establish connection
              try:
-                 ctx.data = self.ydl.urlopen(request)
+                 try:
+                     ctx.data = self.ydl.urlopen(request)
+                 except (compat_urllib_error.URLError, ) as err:
+                     if isinstance(err.reason, socket.timeout):
+                         raise RetryDownload(err)
+                     raise err
                  # When trying to resume, Content-Range HTTP header of response has to be checked
                  # to match the value of requested Range HTTP header. This is due to a webservers
                  # that don't support resuming and serve a whole file with no Content-Range
  
              def retry(e):
                  to_stdout = ctx.tmpfilename == '-'
-                 if not to_stdout:
-                     ctx.stream.close()
-                 ctx.stream = None
+                 if ctx.stream is not None:
+                     if not to_stdout:
+                         ctx.stream.close()
+                     ctx.stream = None
                  ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
                  raise RetryDownload(e)
  
                  except socket.timeout as e:
                      retry(e)
                  except socket.error as e:
-                     if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
-                         raise
-                     retry(e)
+                     # SSLError on python 2 (inherits socket.error) may have
+                     # no errno set but this error message
+                     if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
+                         retry(e)
+                     raise
  
                  byte_counter += len(data_block)
  
index 310229d5740cb2a2cf529c656aec298fa137c1f1,021945a89e16c80262728a4a9a400674735389a7..4b42d699f8d2ae1dc8142982c34e532d1684fdce
@@@ -10,6 -10,7 +10,7 @@@ import o
  import random
  import re
  import socket
+ import ssl
  import sys
  import time
  import math
@@@ -67,6 -68,7 +68,7 @@@ from ..utils import 
      sanitized_Request,
      sanitize_filename,
      str_or_none,
+     str_to_int,
      strip_or_none,
      unescapeHTML,
      unified_strdate,
@@@ -269,7 -271,7 +271,7 @@@ class InfoExtractor(object)
                                       Set to "root" to indicate that this is a
                                       comment to the original video.
      age_limit:      Age restriction for the video, as an integer (years)
 -    webpage_url:    The URL to the video webpage, if given to youtube-dl it
 +    webpage_url:    The URL to the video webpage, if given to youtube-dlc it
                      should allow to get the same result again. (It will be set
                      by YoutubeDL if it's missing)
      categories:     A list of categories that the video falls in, for example
                  url_or_request = update_url_query(url_or_request, query)
              if data is not None or headers:
                  url_or_request = sanitized_Request(url_or_request, data, headers)
+         exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
+         if hasattr(ssl, 'CertificateError'):
+             exceptions.append(ssl.CertificateError)
          try:
              return self._downloader.urlopen(url_or_request)
-         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+         except tuple(exceptions) as err:
              if isinstance(err, compat_urllib_error.HTTPError):
                  if self.__can_accept_status_code(err, expected_status):
                      # Retain reference to error to prevent file object from
                  interaction_type = is_e.get('interactionType')
                  if not isinstance(interaction_type, compat_str):
                      continue
-                 interaction_count = int_or_none(is_e.get('userInteractionCount'))
+                 # For interaction count some sites provide string instead of
+                 # an integer (as per spec) with non digit characters (e.g. ",")
+                 # so extracting count with more relaxed str_to_int
+                 interaction_count = str_to_int(is_e.get('userInteractionCount'))
                  if interaction_count is None:
                      continue
                  count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
                  'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
                  'duration': parse_duration(e.get('duration')),
                  'timestamp': unified_timestamp(e.get('uploadDate')),
+                 'uploader': str_or_none(e.get('author')),
                  'filesize': float_or_none(e.get('contentSize')),
                  'tbr': int_or_none(e.get('bitrate')),
                  'width': int_or_none(e.get('width')),
          if not isinstance(manifest, compat_etree_Element) and not fatal:
              return []
  
 -        # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
 +        # currently youtube-dlc cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
          akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
          if akamai_pv is not None and ';' in akamai_pv.text:
              playerVerificationChallenge = akamai_pv.text.split(';')[0]
              http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
           2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
          """
 -        if mpd_doc.get('type') == 'dynamic':
 -            return []
 +        if not self._downloader.params.get('dynamic_mpd'):
 +            if mpd_doc.get('type') == 'dynamic':
 +                return []
  
          namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
  
index 3567a32839eef2f75123a3f1b939038cf3eaf678,529f3f7119fd4e93a8c82168d910ab4fc3d1720e..529f3f7119fd4e93a8c82168d910ab4fc3d1720e
@@@ -17,6 -17,7 +17,7 @@@ from ..utils import 
      determine_ext,
      ExtractorError,
      int_or_none,
+     merge_dicts,
      NO_DEFAULT,
      orderedSet,
      remove_quotes,
@@@ -59,13 -60,14 +60,14 @@@ class PornHubIE(PornHubBaseIE)
                      '''
      _TESTS = [{
          'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
-         'md5': '1e19b41231a02eba417839222ac9d58e',
+         'md5': 'a6391306d050e4547f62b3f485dd9ba9',
          'info_dict': {
              'id': '648719015',
              'ext': 'mp4',
              'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
              'uploader': 'Babes',
              'upload_date': '20130628',
+             'timestamp': 1372447216,
              'duration': 361,
              'view_count': int,
              'like_count': int,
@@@ -82,8 -84,8 +84,8 @@@
              'id': '1331683002',
              'ext': 'mp4',
              'title': '重庆婷婷女王足交',
-             'uploader': 'Unknown',
              'upload_date': '20150213',
+             'timestamp': 1423804862,
              'duration': 1753,
              'view_count': int,
              'like_count': int,
          'params': {
              'skip_download': True,
          },
+         'skip': 'This video has been disabled',
      }, {
          'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
          'only_matching': True,
  
          video_uploader = self._html_search_regex(
              r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
-             webpage, 'uploader', fatal=False)
+             webpage, 'uploader', default=None)
  
          view_count = self._extract_count(
-             r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+             r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
          like_count = self._extract_count(
              r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
          dislike_count = self._extract_count(
              if div:
                  return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
  
-         return {
+         info = self._search_json_ld(webpage, video_id, default={})
+         # description provided in JSON-LD is irrelevant
+         info['description'] = None
+         return merge_dicts({
              'id': video_id,
              'uploader': video_uploader,
              'upload_date': upload_date,
              'tags': extract_list('tags'),
              'categories': extract_list('categories'),
              'subtitles': subtitles,
-         }
+         }, info)
  
  
  class PornHubPlaylistBaseIE(PornHubBaseIE):
index 2d2f6a98c97dba8605cb9f640c7c73d860caa1d0,a1ca791caaa165b520f121c5969b7f0ab6173acc..a1ca791caaa165b520f121c5969b7f0ab6173acc
@@@ -15,7 -15,7 +15,7 @@@ from ..utils import 
  
  
  class RedTubeIE(InfoExtractor):
-     _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+     _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
      _TESTS = [{
          'url': 'http://www.redtube.com/66418',
          'md5': 'fc08071233725f26b8f014dba9590005',
@@@ -31,6 -31,9 +31,9 @@@
      }, {
          'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
          'only_matching': True,
+     }, {
+         'url': 'http://it.redtube.com/66418',
+         'only_matching': True,
      }]
  
      @staticmethod
index c82c94b3a0009da2cf0938c92910feec84de018b,b4c485b9be38d492401be9052fb1ddbbe461e4cc..b4c485b9be38d492401be9052fb1ddbbe461e4cc
@@@ -13,14 -13,24 +13,24 @@@ from ..utils import 
  
  class TeleQuebecBaseIE(InfoExtractor):
      @staticmethod
-     def _limelight_result(media_id):
+     def _result(url, ie_key):
          return {
              '_type': 'url_transparent',
-             'url': smuggle_url(
-                 'limelight:media:' + media_id, {'geo_countries': ['CA']}),
-             'ie_key': 'LimelightMedia',
+             'url': smuggle_url(url, {'geo_countries': ['CA']}),
+             'ie_key': ie_key,
          }
  
+     @staticmethod
+     def _limelight_result(media_id):
+         return TeleQuebecBaseIE._result(
+             'limelight:media:' + media_id, 'LimelightMedia')
+     @staticmethod
+     def _brightcove_result(brightcove_id):
+         return TeleQuebecBaseIE._result(
+             'http://players.brightcove.net/6150020952001/default_default/index.html?videoId=%s'
+             % brightcove_id, 'BrightcoveNew')
  
  class TeleQuebecIE(TeleQuebecBaseIE):
      _VALID_URL = r'''(?x)
              'id': '577116881b4b439084e6b1cf4ef8b1b3',
              'ext': 'mp4',
              'title': 'Un petit choc et puis repart!',
-             'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+             'description': 'md5:067bc84bd6afecad85e69d1000730907',
+         },
+         'params': {
+             'skip_download': True,
+         },
+     }, {
+         'url': 'https://zonevideo.telequebec.tv/media/55267/le-soleil/passe-partout',
+         'info_dict': {
+             'id': '6167180337001',
+             'ext': 'mp4',
+             'title': 'Le soleil',
+             'description': 'md5:64289c922a8de2abbe99c354daffde02',
+             'uploader_id': '6150020952001',
+             'upload_date': '20200625',
+             'timestamp': 1593090307,
          },
          'params': {
+             'format': 'bestvideo',
              'skip_download': True,
          },
+         'add_ie': ['BrightcoveNew'],
      }, {
          # no description
          'url': 'http://zonevideo.telequebec.tv/media/30261',
              'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
              media_id)['media']
  
-         info = self._limelight_result(media_data['streamInfo']['sourceId'])
+         source_id = media_data['streamInfo']['sourceId']
+         source = (try_get(
+             media_data, lambda x: x['streamInfo']['source'],
+             compat_str) or 'limelight').lower()
+         if source == 'brightcove':
+             info = self._brightcove_result(source_id)
+         else:
+             info = self._limelight_result(source_id)
          info.update({
              'title': media_data.get('title'),
              'description': try_get(
index eadc48c6d88d4099c4bd5961d78f9bb5d717925d,ab665443271106415536dd6db63c1449cf195f6e..ab665443271106415536dd6db63c1449cf195f6e
@@@ -24,7 -24,6 +24,6 @@@ from ..utils import 
      parse_duration,
      parse_iso8601,
      qualities,
-     str_or_none,
      try_get,
      unified_timestamp,
      update_url_query,
@@@ -337,19 -336,27 +336,27 @@@ def _make_video_result(node)
  class TwitchGraphQLBaseIE(TwitchBaseIE):
      _PAGE_LIMIT = 100
  
-     def _download_gql(self, video_id, op, variables, sha256_hash, note, fatal=True):
+     _OPERATION_HASHES = {
+         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
+         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
+         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
+         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
+         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
+         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
+         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
+     }
+     def _download_gql(self, video_id, ops, note, fatal=True):
+         for op in ops:
+             op['extensions'] = {
+                 'persistedQuery': {
+                     'version': 1,
+                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
+                 }
+             }
          return self._download_json(
              'https://gql.twitch.tv/gql', video_id, note,
-             data=json.dumps({
-                 'operationName': op,
-                 'variables': variables,
-                 'extensions': {
-                     'persistedQuery': {
-                         'version': 1,
-                         'sha256Hash': sha256_hash,
-                     }
-                 }
-             }).encode(),
+             data=json.dumps(ops).encode(),
              headers={
                  'Content-Type': 'text/plain;charset=UTF-8',
                  'Client-ID': self._CLIENT_ID,
@@@ -369,14 -376,15 +376,15 @@@ class TwitchCollectionIE(TwitchGraphQLB
      }]
  
      _OPERATION_NAME = 'CollectionSideBar'
-     _SHA256_HASH = '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14'
  
      def _real_extract(self, url):
          collection_id = self._match_id(url)
          collection = self._download_gql(
-             collection_id, self._OPERATION_NAME,
-             {'collectionID': collection_id}, self._SHA256_HASH,
-             'Downloading collection GraphQL')['data']['collection']
+             collection_id, [{
+                 'operationName': self._OPERATION_NAME,
+                 'variables': {'collectionID': collection_id},
+             }],
+             'Downloading collection GraphQL')[0]['data']['collection']
          title = collection.get('title')
          entries = []
          for edge in collection['items']['edges']:
@@@ -403,14 -411,16 +411,16 @@@ class TwitchPlaylistBaseIE(TwitchGraphQ
              if cursor:
                  variables['cursor'] = cursor
              page = self._download_gql(
-                 channel_name, self._OPERATION_NAME, variables,
-                 self._SHA256_HASH,
+                 channel_name, [{
+                     'operationName': self._OPERATION_NAME,
+                     'variables': variables,
+                 }],
                  'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
                  fatal=False)
              if not page:
                  break
              edges = try_get(
-                 page, lambda x: x['data']['user'][entries_key]['edges'], list)
+                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
              if not edges:
                  break
              for edge in edges:
@@@ -553,7 -563,6 +563,6 @@@ class TwitchVideosIE(TwitchPlaylistBase
          'views': 'Popular',
      }
  
-     _SHA256_HASH = 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb'
      _OPERATION_NAME = 'FilterableVideoTower_Videos'
      _ENTRY_KIND = 'video'
      _EDGE_KIND = 'VideoEdge'
@@@ -622,7 -631,6 +631,6 @@@ class TwitchVideosClipsIE(TwitchPlaylis
      # NB: values other than 20 result in skipped videos
      _PAGE_LIMIT = 20
  
-     _SHA256_HASH = 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777'
      _OPERATION_NAME = 'ClipsCards__User'
      _ENTRY_KIND = 'clip'
      _EDGE_KIND = 'ClipEdge'
@@@ -680,7 -688,6 +688,6 @@@ class TwitchVideosCollectionsIE(TwitchP
          'playlist_mincount': 3,
      }]
  
-     _SHA256_HASH = '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84'
      _OPERATION_NAME = 'ChannelCollectionsContent'
      _ENTRY_KIND = 'collection'
      _EDGE_KIND = 'CollectionsItemEdge'
              playlist_title='%s - Collections' % channel_name)
  
  
- class TwitchStreamIE(TwitchBaseIE):
+ class TwitchStreamIE(TwitchGraphQLBaseIE):
      IE_NAME = 'twitch:stream'
      _VALID_URL = r'''(?x)
                      https?://
                  else super(TwitchStreamIE, cls).suitable(url))
  
      def _real_extract(self, url):
-         channel_name = self._match_id(url)
-         access_token = self._download_access_token(channel_name)
-         token = access_token['token']
-         channel_id = self._extract_channel_id(token, channel_name)
+         channel_name = self._match_id(url).lower()
+         gql = self._download_gql(
+             channel_name, [{
+                 'operationName': 'StreamMetadata',
+                 'variables': {'channelLogin': channel_name},
+             }, {
+                 'operationName': 'ComscoreStreamingQuery',
+                 'variables': {
+                     'channel': channel_name,
+                     'clipSlug': '',
+                     'isClip': False,
+                     'isLive': True,
+                     'isVodOrCollection': False,
+                     'vodID': '',
+                 },
+             }, {
+                 'operationName': 'VideoPreviewOverlay',
+                 'variables': {'login': channel_name},
+             }],
+             'Downloading stream GraphQL')
+         user = gql[0]['data']['user']
+         if not user:
+             raise ExtractorError(
+                 '%s does not exist' % channel_name, expected=True)
  
-         stream = self._call_api(
-             'kraken/streams/%s?stream_type=all' % channel_id,
-             channel_id, 'Downloading stream JSON').get('stream')
+         stream = user['stream']
  
          if not stream:
-             raise ExtractorError('%s is offline' % channel_id, expected=True)
+             raise ExtractorError('%s is offline' % channel_name, expected=True)
  
-         # Channel name may be typed if different case than the original channel name
-         # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
-         # an invalid m3u8 URL. Working around by use of original channel name from stream
-         # JSON and fallback to lowercase if it's not available.
-         channel_name = try_get(
-             stream, lambda x: x['channel']['name'],
-             compat_str) or channel_name.lower()
+         access_token = self._download_access_token(channel_name)
+         token = access_token['token']
  
+         stream_id = stream.get('id') or channel_name
          query = {
              'allow_source': 'true',
              'allow_audio_only': 'true',
              'token': token.encode('utf-8'),
          }
          formats = self._extract_m3u8_formats(
-             '%s/api/channel/hls/%s.m3u8?%s'
-             % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)),
-             channel_id, 'mp4')
+             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
+             stream_id, 'mp4', query=query)
          self._prefer_source(formats)
  
          view_count = stream.get('viewers')
-         timestamp = parse_iso8601(stream.get('created_at'))
+         timestamp = unified_timestamp(stream.get('createdAt'))
  
-         channel = stream['channel']
-         title = self._live_title(channel.get('display_name') or channel.get('name'))
-         description = channel.get('status')
+         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
+         uploader = sq_user.get('displayName')
+         description = try_get(
+             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
  
-         thumbnails = []
-         for thumbnail_key, thumbnail_url in stream['preview'].items():
-             m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
-             if not m:
-                 continue
-             thumbnails.append({
-                 'url': thumbnail_url,
-                 'width': int(m.group('width')),
-                 'height': int(m.group('height')),
-             })
+         thumbnail = url_or_none(try_get(
+             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
+             compat_str))
+         title = uploader or channel_name
+         stream_type = stream.get('type')
+         if stream_type in ['rerun', 'live']:
+             title += ' (%s)' % stream_type
  
          return {
-             'id': str_or_none(stream.get('_id')) or channel_id,
+             'id': stream_id,
              'display_id': channel_name,
-             'title': title,
+             'title': self._live_title(title),
              'description': description,
-             'thumbnails': thumbnails,
-             'uploader': channel.get('display_name'),
-             'uploader_id': channel.get('name'),
+             'thumbnail': thumbnail,
+             'uploader': uploader,
+             'uploader_id': channel_name,
              'timestamp': timestamp,
              'view_count': view_count,
              'formats': formats,
-             'is_live': True,
+             'is_live': stream_type == 'live',
          }