youtube_dl/extractor/soundcloud.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import re
   6
   7 from .common import (
   8     InfoExtractor,
   9     SearchInfoExtractor
  10 )
  11 from ..compat import (
  12     compat_str,
  13     compat_urlparse,
  14 )
  15 from ..utils import (
  16     ExtractorError,
  17     float_or_none,
  18     HEADRequest,
  19     int_or_none,
  20     KNOWN_EXTENSIONS,
  21     mimetype2ext,
  22     str_or_none,
  23     try_get,
  24     unified_timestamp,
  25     update_url_query,
  26     url_or_none,
  27 )
  28
  29
  30 class SoundcloudEmbedIE(InfoExtractor):
  31     _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
  32     _TEST = {
  33         # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
  34         'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
  35         'only_matching': True,
  36     }
  37
  38     @staticmethod
  39     def _extract_urls(webpage):
  40         return [m.group('url') for m in re.finditer(
  41             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
  42             webpage)]
  43
  44     def _real_extract(self, url):
  45         query = compat_urlparse.parse_qs(
  46             compat_urlparse.urlparse(url).query)
  47         api_url = query['url'][0]
  48         secret_token = query.get('secret_token')
  49         if secret_token:
  50             api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
  51         return self.url_result(api_url)
  52
  53
  54 class SoundcloudIE(InfoExtractor):
  55     """Information extractor for soundcloud.com
  56        To access the media, the uid of the song and a stream token
  57        must be extracted from the page source and the script must make
  58        a request to media.soundcloud.com/crossdomain.xml. Then
  59        the media can be grabbed by requesting from an url composed
  60        of the stream token and uid
  61      """
  62
  63     _VALID_URL = r'''(?x)^(?:https?://)?
  64                     (?:(?:(?:www\.|m\.)?soundcloud\.com/
  65                             (?!stations/track)
  66                             (?P<uploader>[\w\d-]+)/
  67                             (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
  68                             (?P<title>[\w\d-]+)/?
  69                             (?P<token>[^?]+?)?(?:[?].*)?$)
  70                        |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
  71                           (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
  72                     )
  73                     '''
  74     IE_NAME = 'soundcloud'
  75     _TESTS = [
  76         {
  77             'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
  78             'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
  79             'info_dict': {
  80                 'id': '62986583',
  81                 'ext': 'mp3',
  82                 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
  83                 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
  84                 'uploader': 'E.T. ExTerrestrial Music',
  85                 'uploader_id': '1571244',
  86                 'timestamp': 1349920598,
  87                 'upload_date': '20121011',
  88                 'duration': 143.216,
  89                 'license': 'all-rights-reserved',
  90                 'view_count': int,
  91                 'like_count': int,
  92                 'comment_count': int,
  93                 'repost_count': int,
  94             }
  95         },
  96         # not streamable song
  97         {
  98             'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
  99             'info_dict': {
 100                 'id': '47127627',
 101                 'ext': 'mp3',
 102                 'title': 'Goldrushed',
 103                 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
 104                 'uploader': 'The Royal Concept',
 105                 'uploader_id': '9615865',
 106                 'timestamp': 1337635207,
 107                 'upload_date': '20120521',
 108                 'duration': 30,
 109                 'license': 'all-rights-reserved',
 110                 'view_count': int,
 111                 'like_count': int,
 112                 'comment_count': int,
 113                 'repost_count': int,
 114             },
 115             'params': {
 116                 # rtmp
 117                 'skip_download': True,
 118             },
 119             'skip': 'Preview',
 120         },
 121         # private link
 122         {
 123             'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
 124             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
 125             'info_dict': {
 126                 'id': '123998367',
 127                 'ext': 'mp3',
 128                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
 129                 'description': 'test chars:  \"\'/\\ä↭',
 130                 'uploader': 'jaimeMF',
 131                 'uploader_id': '69767071',
 132                 'timestamp': 1386604920,
 133                 'upload_date': '20131209',
 134                 'duration': 9.927,
 135                 'license': 'all-rights-reserved',
 136                 'view_count': int,
 137                 'like_count': int,
 138                 'comment_count': int,
 139                 'repost_count': int,
 140             },
 141         },
 142         # private link (alt format)
 143         {
 144             'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
 145             'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
 146             'info_dict': {
 147                 'id': '123998367',
 148                 'ext': 'mp3',
 149                 'title': 'Youtube - Dl Test Video \'\' Ä↭',
 150                 'description': 'test chars:  \"\'/\\ä↭',
 151                 'uploader': 'jaimeMF',
 152                 'uploader_id': '69767071',
 153                 'timestamp': 1386604920,
 154                 'upload_date': '20131209',
 155                 'duration': 9.927,
 156                 'license': 'all-rights-reserved',
 157                 'view_count': int,
 158                 'like_count': int,
 159                 'comment_count': int,
 160                 'repost_count': int,
 161             },
 162         },
 163         # downloadable song
 164         {
 165             'url': 'https://soundcloud.com/oddsamples/bus-brakes',
 166             'md5': '7624f2351f8a3b2e7cd51522496e7631',
 167             'info_dict': {
 168                 'id': '128590877',
 169                 'ext': 'mp3',
 170                 'title': 'Bus Brakes',
 171                 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
 172                 'uploader': 'oddsamples',
 173                 'uploader_id': '73680509',
 174                 'timestamp': 1389232924,
 175                 'upload_date': '20140109',
 176                 'duration': 17.346,
 177                 'license': 'cc-by-sa',
 178                 'view_count': int,
 179                 'like_count': int,
 180                 'comment_count': int,
 181                 'repost_count': int,
 182             },
 183         },
 184         # private link, downloadable format
 185         {
 186             'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
 187             'md5': '64a60b16e617d41d0bef032b7f55441e',
 188             'info_dict': {
 189                 'id': '340344461',
 190                 'ext': 'wav',
 191                 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
 192                 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
 193                 'uploader': 'Ori Uplift Music',
 194                 'uploader_id': '12563093',
 195                 'timestamp': 1504206263,
 196                 'upload_date': '20170831',
 197                 'duration': 7449.096,
 198                 'license': 'all-rights-reserved',
 199                 'view_count': int,
 200                 'like_count': int,
 201                 'comment_count': int,
 202                 'repost_count': int,
 203             },
 204         },
 205         # no album art, use avatar pic for thumbnail
 206         {
 207             'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
 208             'md5': '59c7872bc44e5d99b7211891664760c2',
 209             'info_dict': {
 210                 'id': '309699954',
 211                 'ext': 'mp3',
 212                 'title': 'Sideways (Prod. Mad Real)',
 213                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
 214                 'uploader': 'garyvee',
 215                 'uploader_id': '2366352',
 216                 'timestamp': 1488152409,
 217                 'upload_date': '20170226',
 218                 'duration': 207.012,
 219                 'thumbnail': r're:https?://.*\.jpg',
 220                 'license': 'all-rights-reserved',
 221                 'view_count': int,
 222                 'like_count': int,
 223                 'comment_count': int,
 224                 'repost_count': int,
 225             },
 226             'params': {
 227                 'skip_download': True,
 228             },
 229         },
 230         # not available via api.soundcloud.com/i1/tracks/id/streams
 231         {
 232             'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
 233             'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
 234             'info_dict': {
 235                 'id': '583011102',
 236                 'ext': 'mp3',
 237                 'title': 'Mezzo Valzer',
 238                 'description': 'md5:4138d582f81866a530317bae316e8b61',
 239                 'uploader': 'Giovanni Sarani',
 240                 'uploader_id': '3352531',
 241                 'timestamp': 1551394171,
 242                 'upload_date': '20190228',
 243                 'duration': 180.157,
 244                 'thumbnail': r're:https?://.*\.jpg',
 245                 'license': 'all-rights-reserved',
 246                 'view_count': int,
 247                 'like_count': int,
 248                 'comment_count': int,
 249                 'repost_count': int,
 250             },
 251             'expected_warnings': ['Unable to download JSON metadata'],
 252         }
 253     ]
 254
 255     _API_BASE = 'https://api.soundcloud.com/'
 256     _API_V2_BASE = 'https://api-v2.soundcloud.com/'
 257     _BASE_URL = 'https://soundcloud.com/'
 258     _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji'
 259     _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
 260
 261     _ARTWORK_MAP = {
 262         'mini': 16,
 263         'tiny': 20,
 264         'small': 32,
 265         'badge': 47,
 266         't67x67': 67,
 267         'large': 100,
 268         't300x300': 300,
 269         'crop': 400,
 270         't500x500': 500,
 271         'original': 0,
 272     }
 273
 274     @classmethod
 275     def _resolv_url(cls, url):
 276         return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID
 277
 278     def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2):
 279         track_id = compat_str(info['id'])
 280         title = info['title']
 281         track_base_url = self._API_BASE + 'tracks/%s' % track_id
 282
 283         format_urls = set()
 284         formats = []
 285         query = {'client_id': self._CLIENT_ID}
 286         if secret_token:
 287             query['secret_token'] = secret_token
 288
 289         if info.get('downloadable') and info.get('has_downloads_left'):
 290             format_url = update_url_query(
 291                 info.get('download_url') or track_base_url + '/download', query)
 292             format_urls.add(format_url)
 293             if version == 2:
 294                 v1_info = self._download_json(
 295                     track_base_url, track_id, query=query, fatal=False) or {}
 296             else:
 297                 v1_info = info
 298             formats.append({
 299                 'format_id': 'download',
 300                 'ext': v1_info.get('original_format') or 'mp3',
 301                 'filesize': int_or_none(v1_info.get('original_content_size')),
 302                 'url': format_url,
 303                 'preference': 10,
 304             })
 305
 306         def invalid_url(url):
 307             return not url or url in format_urls or re.search(r'/(?:preview|playlist)/0/30/', url)
 308
 309         def add_format(f, protocol):
 310             mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
 311             if mobj:
 312                 for k, v in mobj.groupdict().items():
 313                     if not f.get(k):
 314                         f[k] = v
 315             format_id_list = []
 316             if protocol:
 317                 format_id_list.append(protocol)
 318             for k in ('ext', 'abr'):
 319                 v = f.get(k)
 320                 if v:
 321                     format_id_list.append(v)
 322             abr = f.get('abr')
 323             if abr:
 324                 f['abr'] = int(abr)
 325             f.update({
 326                 'format_id': '_'.join(format_id_list),
 327                 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
 328             })
 329             formats.append(f)
 330
 331         # New API
 332         transcodings = try_get(
 333             info, lambda x: x['media']['transcodings'], list) or []
 334         for t in transcodings:
 335             if not isinstance(t, dict):
 336                 continue
 337             format_url = url_or_none(t.get('url'))
 338             if not format_url or t.get('snipped') or '/preview/' in format_url:
 339                 continue
 340             stream = self._download_json(
 341                 format_url, track_id, query=query, fatal=False)
 342             if not isinstance(stream, dict):
 343                 continue
 344             stream_url = url_or_none(stream.get('url'))
 345             if invalid_url(stream_url):
 346                 continue
 347             format_urls.add(stream_url)
 348             stream_format = t.get('format') or {}
 349             protocol = stream_format.get('protocol')
 350             if protocol != 'hls' and '/hls' in format_url:
 351                 protocol = 'hls'
 352             ext = None
 353             preset = str_or_none(t.get('preset'))
 354             if preset:
 355                 ext = preset.split('_')[0]
 356             if ext not in KNOWN_EXTENSIONS:
 357                 ext = mimetype2ext(stream_format.get('mime_type'))
 358             add_format({
 359                 'url': stream_url,
 360                 'ext': ext,
 361             }, 'http' if protocol == 'progressive' else protocol)
 362
 363         if not formats:
 364             # Old API, does not work for some tracks (e.g.
 365             # https://soundcloud.com/giovannisarani/mezzo-valzer)
 366             # and might serve preview URLs (e.g.
 367             # http://www.soundcloud.com/snbrn/ele)
 368             format_dict = self._download_json(
 369                 track_base_url + '/streams', track_id,
 370                 'Downloading track url', query=query, fatal=False) or {}
 371
 372             for key, stream_url in format_dict.items():
 373                 if invalid_url(stream_url):
 374                     continue
 375                 format_urls.add(stream_url)
 376                 mobj = re.search(r'(http|hls)_([^_]+)_(\d+)_url', key)
 377                 if mobj:
 378                     protocol, ext, abr = mobj.groups()
 379                     add_format({
 380                         'abr': abr,
 381                         'ext': ext,
 382                         'url': stream_url,
 383                     }, protocol)
 384
 385         if not formats:
 386             # We fallback to the stream_url in the original info, this
 387             # cannot be always used, sometimes it can give an HTTP 404 error
 388             urlh = self._request_webpage(
 389                 HEADRequest(info.get('stream_url') or track_base_url + '/stream'),
 390                 track_id, query=query, fatal=False)
 391             if urlh:
 392                 stream_url = urlh.geturl()
 393                 if not invalid_url(stream_url):
 394                     add_format({'url': stream_url}, 'http')
 395
 396         for f in formats:
 397             f['vcodec'] = 'none'
 398
 399         self._sort_formats(formats)
 400
 401         user = info.get('user') or {}
 402
 403         thumbnails = []
 404         artwork_url = info.get('artwork_url')
 405         thumbnail = artwork_url or user.get('avatar_url')
 406         if isinstance(thumbnail, compat_str):
 407             if re.search(self._IMAGE_REPL_RE, thumbnail):
 408                 for image_id, size in self._ARTWORK_MAP.items():
 409                     i = {
 410                         'id': image_id,
 411                         'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
 412                     }
 413                     if image_id == 'tiny' and not artwork_url:
 414                         size = 18
 415                     elif image_id == 'original':
 416                         i['preference'] = 10
 417                     if size:
 418                         i.update({
 419                             'width': size,
 420                             'height': size,
 421                         })
 422                     thumbnails.append(i)
 423             else:
 424                 thumbnails = [{'url': thumbnail}]
 425
 426         def extract_count(key):
 427             return int_or_none(info.get('%s_count' % key))
 428
 429         return {
 430             'id': track_id,
 431             'uploader': user.get('username'),
 432             'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
 433             'uploader_url': user.get('permalink_url'),
 434             'timestamp': unified_timestamp(info.get('created_at')),
 435             'title': title,
 436             'description': info.get('description'),
 437             'thumbnails': thumbnails,
 438             'duration': float_or_none(info.get('duration'), 1000),
 439             'webpage_url': info.get('permalink_url'),
 440             'license': info.get('license'),
 441             'view_count': extract_count('playback'),
 442             'like_count': extract_count('favoritings') or extract_count('likes'),
 443             'comment_count': extract_count('comment'),
 444             'repost_count': extract_count('reposts'),
 445             'genre': info.get('genre'),
 446             'formats': formats
 447         }
 448
 449     def _real_extract(self, url):
 450         mobj = re.match(self._VALID_URL, url)
 451
 452         track_id = mobj.group('track_id')
 453
 454         query = {
 455             'client_id': self._CLIENT_ID,
 456         }
 457         if track_id:
 458             info_json_url = self._API_V2_BASE + 'tracks/' + track_id
 459             full_title = track_id
 460             token = mobj.group('secret_token')
 461             if token:
 462                 query['secret_token'] = token
 463         else:
 464             full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
 465             token = mobj.group('token')
 466             if token:
 467                 resolve_title += '/%s' % token
 468             info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
 469
 470         version = 2
 471         info = self._download_json(
 472             info_json_url, full_title, 'Downloading info JSON', query=query, fatal=False)
 473         if not info:
 474             info = self._download_json(
 475                 info_json_url.replace(self._API_V2_BASE, self._API_BASE),
 476                 full_title, 'Downloading info JSON', query=query)
 477             version = 1
 478
 479         return self._extract_info_dict(info, full_title, token, version)
 480
 481
 482 class SoundcloudPlaylistBaseIE(SoundcloudIE):
 483     def _extract_track_entries(self, tracks, token=None):
 484         entries = []
 485         for track in tracks:
 486             track_id = str_or_none(track.get('id'))
 487             url = track.get('permalink_url')
 488             if not url:
 489                 if not track_id:
 490                     continue
 491                 url = self._API_V2_BASE + 'tracks/' + track_id
 492                 if token:
 493                     url += '?secret_token=' + token
 494             entries.append(self.url_result(
 495                 url, SoundcloudIE.ie_key(), track_id))
 496         return entries
 497
 498
 499 class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
 500     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
 501     IE_NAME = 'soundcloud:set'
 502     _TESTS = [{
 503         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
 504         'info_dict': {
 505             'id': '2284613',
 506             'title': 'The Royal Concept EP',
 507         },
 508         'playlist_mincount': 5,
 509     }, {
 510         'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
 511         'only_matching': True,
 512     }]
 513
 514     def _real_extract(self, url):
 515         mobj = re.match(self._VALID_URL, url)
 516
 517         full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
 518         token = mobj.group('token')
 519         if token:
 520             full_title += '/' + token
 521
 522         info = self._download_json(self._resolv_url(
 523             self._BASE_URL + full_title), full_title)
 524
 525         if 'errors' in info:
 526             msgs = (compat_str(err['error_message']) for err in info['errors'])
 527             raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
 528
 529         entries = self._extract_track_entries(info['tracks'], token)
 530
 531         return self.playlist_result(
 532             entries, str_or_none(info.get('id')), info.get('title'))
 533
 534
 535 class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
 536     def _extract_playlist(self, base_url, playlist_id, playlist_title):
 537         COMMON_QUERY = {
 538             'limit': 2000000000,
 539             'client_id': self._CLIENT_ID,
 540             'linked_partitioning': '1',
 541         }
 542
 543         query = COMMON_QUERY.copy()
 544         query['offset'] = 0
 545
 546         next_href = base_url
 547
 548         entries = []
 549         for i in itertools.count():
 550             response = self._download_json(
 551                 next_href, playlist_id,
 552                 'Downloading track page %s' % (i + 1), query=query)
 553
 554             collection = response['collection']
 555
 556             if not isinstance(collection, list):
 557                 collection = []
 558
 559             # Empty collection may be returned, in this case we proceed
 560             # straight to next_href
 561
 562             def resolve_entry(candidates):
 563                 for cand in candidates:
 564                     if not isinstance(cand, dict):
 565                         continue
 566                     permalink_url = url_or_none(cand.get('permalink_url'))
 567                     if not permalink_url:
 568                         continue
 569                     return self.url_result(
 570                         permalink_url,
 571                         SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
 572                         str_or_none(cand.get('id')), cand.get('title'))
 573
 574             for e in collection:
 575                 entry = resolve_entry((e, e.get('track'), e.get('playlist')))
 576                 if entry:
 577                     entries.append(entry)
 578
 579             next_href = response.get('next_href')
 580             if not next_href:
 581                 break
 582
 583             next_href = response['next_href']
 584             parsed_next_href = compat_urlparse.urlparse(next_href)
 585             query = compat_urlparse.parse_qs(parsed_next_href.query)
 586             query.update(COMMON_QUERY)
 587
 588         return {
 589             '_type': 'playlist',
 590             'id': playlist_id,
 591             'title': playlist_title,
 592             'entries': entries,
 593         }
 594
 595
 596 class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
 597     _VALID_URL = r'''(?x)
 598                         https?://
 599                             (?:(?:www|m)\.)?soundcloud\.com/
 600                             (?P<user>[^/]+)
 601                             (?:/
 602                                 (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
 603                             )?
 604                             /?(?:[?#].*)?$
 605                     '''
 606     IE_NAME = 'soundcloud:user'
 607     _TESTS = [{
 608         'url': 'https://soundcloud.com/soft-cell-official',
 609         'info_dict': {
 610             'id': '207965082',
 611             'title': 'Soft Cell (All)',
 612         },
 613         'playlist_mincount': 28,
 614     }, {
 615         'url': 'https://soundcloud.com/soft-cell-official/tracks',
 616         'info_dict': {
 617             'id': '207965082',
 618             'title': 'Soft Cell (Tracks)',
 619         },
 620         'playlist_mincount': 27,
 621     }, {
 622         'url': 'https://soundcloud.com/soft-cell-official/albums',
 623         'info_dict': {
 624             'id': '207965082',
 625             'title': 'Soft Cell (Albums)',
 626         },
 627         'playlist_mincount': 1,
 628     }, {
 629         'url': 'https://soundcloud.com/jcv246/sets',
 630         'info_dict': {
 631             'id': '12982173',
 632             'title': 'Jordi / cv (Sets)',
 633         },
 634         'playlist_mincount': 2,
 635     }, {
 636         'url': 'https://soundcloud.com/jcv246/reposts',
 637         'info_dict': {
 638             'id': '12982173',
 639             'title': 'Jordi / cv (Reposts)',
 640         },
 641         'playlist_mincount': 6,
 642     }, {
 643         'url': 'https://soundcloud.com/clalberg/likes',
 644         'info_dict': {
 645             'id': '11817582',
 646             'title': 'clalberg (Likes)',
 647         },
 648         'playlist_mincount': 5,
 649     }, {
 650         'url': 'https://soundcloud.com/grynpyret/spotlight',
 651         'info_dict': {
 652             'id': '7098329',
 653             'title': 'Grynpyret (Spotlight)',
 654         },
 655         'playlist_mincount': 1,
 656     }]
 657
 658     _BASE_URL_MAP = {
 659         'all': 'stream/users/%s',
 660         'tracks': 'users/%s/tracks',
 661         'albums': 'users/%s/albums',
 662         'sets': 'users/%s/playlists',
 663         'reposts': 'stream/users/%s/reposts',
 664         'likes': 'users/%s/likes',
 665         'spotlight': 'users/%s/spotlight',
 666     }
 667
 668     def _real_extract(self, url):
 669         mobj = re.match(self._VALID_URL, url)
 670         uploader = mobj.group('user')
 671
 672         user = self._download_json(
 673             self._resolv_url(self._BASE_URL + uploader),
 674             uploader, 'Downloading user info')
 675
 676         resource = mobj.group('rsrc') or 'all'
 677
 678         return self._extract_playlist(
 679             self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
 680             str_or_none(user.get('id')),
 681             '%s (%s)' % (user['username'], resource.capitalize()))
 682
 683
 684 class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
 685     _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
 686     IE_NAME = 'soundcloud:trackstation'
 687     _TESTS = [{
 688         'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
 689         'info_dict': {
 690             'id': '286017854',
 691             'title': 'Track station: your text',
 692         },
 693         'playlist_mincount': 47,
 694     }]
 695
 696     def _real_extract(self, url):
 697         track_name = self._match_id(url)
 698
 699         track = self._download_json(self._resolv_url(url), track_name)
 700         track_id = self._search_regex(
 701             r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
 702
 703         return self._extract_playlist(
 704             self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
 705             track_id, 'Track station: %s' % track['title'])
 706
 707
 708 class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
 709     _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
 710     IE_NAME = 'soundcloud:playlist'
 711     _TESTS = [{
 712         'url': 'https://api.soundcloud.com/playlists/4110309',
 713         'info_dict': {
 714             'id': '4110309',
 715             'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
 716             'description': 're:.*?TILT Brass - Bowery Poetry Club',
 717         },
 718         'playlist_count': 6,
 719     }]
 720
 721     def _real_extract(self, url):
 722         mobj = re.match(self._VALID_URL, url)
 723         playlist_id = mobj.group('id')
 724
 725         query = {
 726             'client_id': self._CLIENT_ID,
 727         }
 728         token = mobj.group('token')
 729         if token:
 730             query['secret_token'] = token
 731
 732         data = self._download_json(
 733             self._API_V2_BASE + 'playlists/' + playlist_id,
 734             playlist_id, 'Downloading playlist', query=query)
 735
 736         entries = self._extract_track_entries(data['tracks'], token)
 737
 738         return self.playlist_result(
 739             entries, playlist_id, data.get('title'), data.get('description'))
 740
 741
 742 class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
 743     IE_NAME = 'soundcloud:search'
 744     IE_DESC = 'Soundcloud search'
 745     _MAX_RESULTS = float('inf')
 746     _TESTS = [{
 747         'url': 'scsearch15:post-avant jazzcore',
 748         'info_dict': {
 749             'title': 'post-avant jazzcore',
 750         },
 751         'playlist_count': 15,
 752     }]
 753
 754     _SEARCH_KEY = 'scsearch'
 755     _MAX_RESULTS_PER_PAGE = 200
 756     _DEFAULT_RESULTS_PER_PAGE = 50
 757
 758     def _get_collection(self, endpoint, collection_id, **query):
 759         limit = min(
 760             query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
 761             self._MAX_RESULTS_PER_PAGE)
 762         query.update({
 763             'limit': limit,
 764             'client_id': self._CLIENT_ID,
 765             'linked_partitioning': 1,
 766             'offset': 0,
 767         })
 768         next_url = update_url_query(self._API_V2_BASE + endpoint, query)
 769
 770         collected_results = 0
 771
 772         for i in itertools.count(1):
 773             response = self._download_json(
 774                 next_url, collection_id, 'Downloading page {0}'.format(i),
 775                 'Unable to download API page')
 776
 777             collection = response.get('collection', [])
 778             if not collection:
 779                 break
 780
 781             collection = list(filter(bool, collection))
 782             collected_results += len(collection)
 783
 784             for item in collection:
 785                 yield self.url_result(item['uri'], SoundcloudIE.ie_key())
 786
 787             if not collection or collected_results >= limit:
 788                 break
 789
 790             next_url = response.get('next_href')
 791             if not next_url:
 792                 break
 793
 794     def _get_n_results(self, query, n):
 795         tracks = self._get_collection('search/tracks', query, limit=n, q=query)
 796         return self.playlist_result(tracks, playlist_title=query)