diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index ffb326d..457e58f 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -3,7 +3,6 @@ import json import os import pickle import random -import re import shutil import sys import textwrap @@ -256,18 +255,24 @@ class InstaloaderContext: # Override default timeout behavior. # Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427 session.request = partial(session.request, timeout=self.request_timeout) # type: ignore - csrf_json = self.get_json('accounts/login/', {}, session=session) - csrf_token = csrf_json['config']['csrf_token'] + + self.do_sleep() + # Make a request to Instagram's root URL, which will set the session's csrftoken cookie + # Not using self.get_json() here, because we need to access the cookie + session.get('https://www.instagram.com/') + # Add session's csrftoken cookie to session headers + csrf_token = session.cookies.get_dict()['csrftoken'] session.headers.update({'X-CSRFToken': csrf_token}) - # Not using self.get_json() here, because we need to access csrftoken cookie + self.do_sleep() # Workaround credits to pgrimaud. # See: https://github.com/pgrimaud/instagram-user-feed/commit/96ad4cf54d1ad331b337f325c73e664999a6d066 enc_password = '#PWD_INSTAGRAM_BROWSER:0:{}:{}'.format(int(datetime.now().timestamp()), passwd) - login = session.post('https://www.instagram.com/accounts/login/ajax/', + login = session.post('https://www.instagram.com/api/v1/web/accounts/login/ajax/', data={'enc_password': enc_password, 'username': user}, allow_redirects=True) try: resp_json = login.json() + except json.decoder.JSONDecodeError as err: raise ConnectionException( "Login error: JSON decode fail, {} - {}.".format(login.status_code, login.reason) @@ -403,16 +408,6 @@ class InstaloaderContext: raise TooManyRequestsException("429 Too Many Requests") if resp.status_code != 200: raise ConnectionException("HTTP error code {}.".format(resp.status_code)) - is_html_query = not is_graphql_query and not "__a" in params and host == "www.instagram.com" - if is_html_query: - # Extract JSON from HTML response - match = re.search('(?<={"raw":").*?(?