From 1db182ec27c3597dd8820fba62eb5390c45dbae1 Mon Sep 17 00:00:00 2001
From: master131 <1592009+master131@users.noreply.github.com>
Date: Thu, 6 Oct 2022 05:11:54 +1100
Subject: [PATCH] Update UA and enhance headers for iOS struct (#1744)

---
 instaloader/instaloadercontext.py | 101 ++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 6 deletions(-)

diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py
index 9cb69c1..7c05f2d 100644
--- a/instaloader/instaloadercontext.py
+++ b/instaloader/instaloadercontext.py
@@ -9,6 +9,7 @@ import sys
 import textwrap
 import time
 import urllib.parse
+import uuid
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from functools import partial
@@ -36,6 +37,34 @@ def default_user_agent() -> str:
            '(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
 
 
+def default_iphone_headers() -> Dict[str, Any]:
+    return {'User-Agent': 'Instagram 252.1.0.19.111 (iPad13,8; iOS 15_7; en_US; en-US; ' \
+                          'scale=2.00; 2048x2732; 397943498) AppleWebKit/420+',
+            'x-ads-opt-out': '1',
+            'x-bloks-is-panorama-enabled': 'true',
+            'x-bloks-version-id': 'f4b030ed39b78b24fcc16471d1bffebceaf7d7c01800b24320cc095bda9c63bd',
+            'x-fb-client-ip': 'True',
+            'x-fb-connection-type': 'wifi',
+            'x-fb-http-engine': 'Liger',
+            'x-fb-server-cluster': 'True',
+            'x-fb': '1',
+            'x-ig-abr-connection-speed-kbps': '2',
+            'x-ig-app-id': '124024574287414',
+            'x-ig-app-locale': 'en-US',
+            'x-ig-app-startup-country': 'US',
+            'x-ig-bandwidth-speed-kbps': '0.000',
+            'x-ig-capabilities': '36r/F/8=',
+            'x-ig-connection-speed': '{}kbps'.format(random.randint(1000, 20000)),
+            'x-ig-connection-type': 'WiFi',
+            'x-ig-device-locale': 'en-US',
+            'x-ig-mapped-locale': 'en-US',
+            'x-ig-timezone-offset': str((datetime.now().astimezone().utcoffset() or timedelta(seconds=0)).seconds),
+            'x-ig-www-claim': '0',
+            'x-pigeon-session-id': str(uuid.uuid4()),
+            'x-tigon-is-retry': 'False',
+            'x-whatsapp': '0'}
+
+
 class InstaloaderContext:
     """Class providing methods for (error) logging and low-level communication with Instagram.
 
@@ -61,6 +90,7 @@ class InstaloaderContext:
         self.request_timeout = request_timeout
         self._session = self.get_anonymous_session()
         self.username = None
+        self.user_id = None
         self.sleep = sleep
         self.quiet = quiet
         self.max_connection_attempts = max_connection_attempts
@@ -68,6 +98,7 @@ class InstaloaderContext:
         self._root_rhx_gis = None
         self.two_factor_auth_pending = None
         self.iphone_support = iphone_support
+        self.iphone_headers = default_iphone_headers()
 
         # error log, filled with error() and printed at the end of Instaloader.main()
         self.error_log: List[str] = []
@@ -87,14 +118,20 @@ class InstaloaderContext:
     def anonymous_copy(self):
         session = self._session
         username = self.username
+        user_id = self.user_id
+        iphone_headers = self.iphone_headers
         self._session = self.get_anonymous_session()
         self.username = None
+        self.user_id = None
+        self.iphone_headers = default_iphone_headers()
         try:
             yield self
         finally:
             self._session.close()
             self.username = username
             self._session = session
+            self.user_id = user_id
+            self.iphone_headers = iphone_headers
 
     @property
     def is_logged_in(self) -> bool:
@@ -273,6 +310,7 @@ class InstaloaderContext:
         session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
         self._session = session
         self.username = user
+        self.user_id = resp_json['userId']
 
     def two_factor_login(self, two_factor_code):
         """Second step of login if 2FA is enabled.
@@ -306,7 +344,8 @@ class InstaloaderContext:
             time.sleep(min(random.expovariate(0.6), 15.0))
 
     def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com',
-                 session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]:
+                 session: Optional[requests.Session] = None, _attempt=1,
+                 response_headers: Dict[str, Any] = None) -> Dict[str, Any]:
         """JSON request to Instagram.
 
         :param path: URL, relative to the given domain which defaults to www.instagram.com/
@@ -349,6 +388,9 @@ class InstaloaderContext:
                                     params=params, allow_redirects=False)
                 else:
                     break
+            if response_headers is not None:
+                response_headers.clear()
+                response_headers.update(resp.headers)
             if resp.status_code == 400:
                 raise QueryReturnedBadRequestException("400 Bad Request")
             if resp.status_code == 404:
@@ -399,7 +441,8 @@ class InstaloaderContext:
                         self._rate_controller.handle_429('iphone')
                     if is_other_query:
                         self._rate_controller.handle_429('other')
-                return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
+                return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1,
+                                     response_headers=response_headers)
             except KeyboardInterrupt:
                 self.error("[skipped by user]", repeat_at_end=False)
                 raise ConnectionException(error_string) from err
@@ -489,11 +532,57 @@ class InstaloaderContext:
 
         .. versionadded:: 4.2.1"""
         with copy_session(self._session, self.request_timeout) as tempsession:
-            tempsession.headers['User-Agent'] = 'Instagram 146.0.0.27.125 (iPhone12,1; iOS 13_3; en_US; en-US; ' \
-                                                'scale=2.00; 1656x3584; 190542906)'
-            for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']:
+            # Set headers to simulate an API request from iPad
+            tempsession.headers['ig-intended-user-id'] = str(self.user_id)
+            tempsession.headers['x-pigeon-rawclienttime'] = '{:.6f}'.format(time.time())
+
+            # Add headers obtained from previous iPad request
+            tempsession.headers.update(self.iphone_headers)
+
+            # Extract key information from cookies if we haven't got it already from a previous request
+            header_cookies_mapping = {'x-mid': 'mid',
+                                     'ig-u-ds-user-id': 'ds_user_id',
+                                     'x-ig-device-id': 'ig_did',
+                                     'x-ig-family-device-id': 'ig_did',
+                                     'family_device_id': 'ig_did'}
+
+            # Map the cookie value to the matching HTTP request header
+            cookies = tempsession.cookies.get_dict().copy()
+            for key, value in header_cookies_mapping.items():
+                if value in cookies:
+                    if key not in tempsession.headers:
+                        tempsession.headers[key] = cookies[value]
+                    else:
+                        # Remove the cookie value if it's already specified as a header
+                        tempsession.cookies.pop(value, None)
+
+            # Edge case for ig-u-rur header due to special string encoding in cookie
+            if 'rur' in cookies:
+                if 'ig-u-rur' not in tempsession.headers:
+                    tempsession.headers['ig-u-rur'] = cookies['rur'].strip('\"').encode('utf-8') \
+                                                                    .decode('unicode_escape')
+                else:
+                    tempsession.cookies.pop('rur', None)
+
+            # Remove headers specific to Desktop version
+            for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With', 'Referer']:
                 tempsession.headers.pop(header, None)
-            return self.get_json(path, params, 'i.instagram.com', tempsession)
+
+            # No need for cookies if we have a bearer token
+            if 'authorization' in tempsession.headers:
+                tempsession.cookies.clear()
+
+            response_headers = dict()    # type: Dict[str, Any]
+            response = self.get_json(path, params, 'i.instagram.com', tempsession, response_headers=response_headers)
+
+            # Extract the ig-set-* headers and use them in the next request
+            for key, value in response_headers.items():
+                if key.startswith('ig-set-'):
+                    self.iphone_headers[key.replace('ig-set-', '')] = value
+                elif key.startswith('x-ig-set-'):
+                    self.iphone_headers[key.replace('x-ig-set-', 'x-ig-')] = value
+
+            return response
 
     def write_raw(self, resp: Union[bytes, requests.Response], filename: str) -> None:
         """Write raw response data into a file.