]> jfr.im git - yt-dlp.git/commitdiff
[Instagram] Add IOS URL support (#1560)
authoru-spec-png <redacted>
Fri, 5 Nov 2021 21:31:34 +0000 (21:31 +0000)
committerGitHub <redacted>
Fri, 5 Nov 2021 21:31:34 +0000 (03:01 +0530)
Authored by: u-spec-png

yt_dlp/extractor/extractors.py
yt_dlp/extractor/instagram.py

index 9f818a12f150c81c6d95f494fd714617e51926e0..e984f51b5e73fff97bc90dc65bdebe1e3aac1647 100644 (file)
 from .infoq import InfoQIE
 from .instagram import (
     InstagramIE,
+    InstagramIOSIE,
     InstagramUserIE,
     InstagramTagIE,
 )
index c4036d096c62f8a1a8e13f8c06fd4a7eae7aa806..4694c9a33bbe935a373c9970c341af34d0281d62 100644 (file)
@@ -73,6 +73,48 @@ def _real_initialize(self):
         self._login()
 
 
+class InstagramIOSIE(InfoExtractor):
+    _VALID_URL = r'instagram://media\?id=(?P<id>[\d_]+)'
+    _TESTS = [{
+        'url': 'instagram://media?id=482584233761418119',
+        'md5': '0d2da106a9d2631273e192b372806516',
+        'info_dict': {
+            'id': 'aye83DjauH',
+            'ext': 'mp4',
+            'title': 'Video by naomipq',
+            'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
+            'thumbnail': r're:^https?://.*\.jpg',
+            'duration': 0,
+            'timestamp': 1371748545,
+            'upload_date': '20130620',
+            'uploader_id': 'naomipq',
+            'uploader': 'B E A U T Y  F O R  A S H E S',
+            'like_count': int,
+            'comment_count': int,
+            'comments': list,
+        },
+        'add_ie': ['Instagram']
+    }]
+
+    def _get_id(self, id):
+        """Source: https://stackoverflow.com/questions/24437823/getting-instagram-post-url-from-media-id"""
+        chrs = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
+        media_id = int(id.split('_')[0])
+        shortened_id = ''
+        while media_id > 0:
+            r = media_id % 64
+            media_id = (media_id - r) // 64
+            shortened_id = chrs[r] + shortened_id
+        return shortened_id
+
+    def _real_extract(self, url):
+        return {
+            '_type': 'url_transparent',
+            'url': f'http://instagram.com/tv/{self._get_id(self._match_id(url))}/',
+            'ie_key': 'Instagram',
+        }
+
+
 class InstagramIE(InstagramBaseIE):
     _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
     _TESTS = [{
@@ -348,7 +390,6 @@ def get_count(keys, kind):
 
 
 class InstagramPlaylistBaseIE(InstagramBaseIE):
-
     _gis_tmpl = None  # used to cache GIS request type
 
     def _parse_graphql(self, webpage, item_id):