Bypass IP-based redirects to /accounts/login (#727)
* Bypass IP-based redirects to /accounts/login There are two main changes made: For users, we request /{username}/feed/ instead of /{username}/. For some reason, this completely bypasses the login redirect. This page doesn't work in browser while blocked, but fortunately all the data we need is just present in the HTML page. For posts, we change from using the /p/ page to using the graphql endpoint for the same data, which is still subject to graphql rate limits, but is not subject to login redirects. The data is identical between the two pages, apart from the object keys being sorted differently and rhx_gis being missing on graphql. Yes, this now unblocks access from VPNs, Tor, cloud servers, etc. * Apply requested patch to fix comments * Remove rhx_gis from Post and Profile Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com>
This commit is contained in:
parent
e21d34124d
commit
db63e5c7e5
@ -69,7 +69,6 @@ class Post:
|
||||
self._node = node
|
||||
self._owner_profile = owner_profile
|
||||
self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
|
||||
self._rhx_gis_str = None # type: Optional[str]
|
||||
self._location = None # type: Optional[PostLocation]
|
||||
self._iphone_struct_ = None
|
||||
if 'iphone_struct' in node:
|
||||
@ -142,9 +141,11 @@ class Post:
|
||||
|
||||
def _obtain_metadata(self):
|
||||
if not self._full_metadata_dict:
|
||||
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
|
||||
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
|
||||
self._rhx_gis_str = pic_json.get('rhx_gis')
|
||||
pic_json = self._context.graphql_query(
|
||||
'2b0673e0dc4580674a88d426fe00ea90',
|
||||
{'shortcode': self.shortcode}
|
||||
)
|
||||
self._full_metadata_dict = pic_json['data']['shortcode_media']
|
||||
if self._full_metadata_dict is None:
|
||||
# issue #449
|
||||
self._context.error("Fetching Post metadata failed (issue #449). "
|
||||
@ -161,11 +162,6 @@ class Post:
|
||||
assert self._full_metadata_dict is not None
|
||||
return self._full_metadata_dict
|
||||
|
||||
@property
|
||||
def _rhx_gis(self) -> Optional[str]:
|
||||
self._obtain_metadata()
|
||||
return self._rhx_gis_str
|
||||
|
||||
@property
|
||||
def _iphone_struct(self) -> Dict[str, Any]:
|
||||
if not self._context.is_logged_in:
|
||||
@ -392,7 +388,7 @@ class Post:
|
||||
created_at_utc=datetime.utcfromtimestamp(node['created_at']),
|
||||
text=node['text'],
|
||||
owner=Profile(self._context, node['owner']),
|
||||
likes_count=node['edge_liked_by']['count'])
|
||||
likes_count=node.get('edge_liked_by', {}).get('count', 0))
|
||||
|
||||
def _postcommentanswers(node):
|
||||
if 'edge_threaded_comments' not in node:
|
||||
@ -418,14 +414,9 @@ class Post:
|
||||
if self.comments == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
try:
|
||||
comment_edges = self._field('edge_media_to_parent_comment', 'edges')
|
||||
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
|
||||
threaded_comments_available = True
|
||||
except KeyError:
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
answers_count = 0
|
||||
threaded_comments_available = False
|
||||
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
|
||||
|
||||
if self.comments == len(comment_edges) + answers_count:
|
||||
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
||||
@ -433,14 +424,10 @@ class Post:
|
||||
return
|
||||
yield from (_postcomment(node) for node in
|
||||
self._context.graphql_node_list(
|
||||
"97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
|
||||
else "f0986789a5c5d17c2400faebf16efd0d",
|
||||
"97b41c52301f77ce508f55e66d17620e",
|
||||
{'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d:
|
||||
d['data']['shortcode_media'][
|
||||
'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
|
||||
|
||||
def get_likes(self) -> Iterator['Profile']:
|
||||
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
|
||||
@ -455,8 +442,7 @@ class Post:
|
||||
yield from (Profile(self._context, node) for node in
|
||||
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by']))
|
||||
|
||||
@property
|
||||
def is_sponsored(self) -> bool:
|
||||
@ -537,7 +523,6 @@ class Profile:
|
||||
self._has_public_story = None # type: Optional[bool]
|
||||
self._node = node
|
||||
self._has_full_metadata = False
|
||||
self._rhx_gis = None
|
||||
self._iphone_struct_ = None
|
||||
if 'iphone_struct' in node:
|
||||
# if loaded from JSON with load_structure_from_file()
|
||||
@ -599,10 +584,9 @@ class Profile:
|
||||
def _obtain_metadata(self):
|
||||
try:
|
||||
if not self._has_full_metadata:
|
||||
metadata = self._context.get_json('{}/'.format(self.username), params={})
|
||||
metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
|
||||
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
|
||||
self._has_full_metadata = True
|
||||
self._rhx_gis = metadata.get('rhx_gis')
|
||||
except (QueryReturnedNotFoundException, KeyError) as err:
|
||||
top_search_results = TopSearchResults(self._context, self.username)
|
||||
similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
|
||||
@ -735,8 +719,7 @@ class Profile:
|
||||
'include_reel': False, 'include_suggested_users': False,
|
||||
'include_logged_out_extras': True,
|
||||
'include_highlight_reels': False},
|
||||
'https://www.instagram.com/{}/'.format(self.username),
|
||||
self._rhx_gis)
|
||||
'https://www.instagram.com/{}/'.format(self.username))
|
||||
self._has_public_story = data['data']['user']['has_public_story']
|
||||
assert self._has_public_story is not None
|
||||
return self._has_public_story
|
||||
@ -795,8 +778,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_owner_to_timeline_media')))
|
||||
first_data=self._metadata('edge_owner_to_timeline_media')))
|
||||
|
||||
def get_saved_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts that are marked as saved by the user."""
|
||||
@ -810,8 +792,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_saved_media'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_saved_media')))
|
||||
first_data=self._metadata('edge_saved_media')))
|
||||
|
||||
def get_tagged_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all posts where a profile is tagged.
|
||||
@ -822,8 +803,7 @@ class Profile:
|
||||
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_user_to_photos_of_you'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_user_to_photos_of_you']))
|
||||
|
||||
def get_igtv_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all IGTV posts.
|
||||
@ -835,8 +815,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/channel/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_felix_video_timeline'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_felix_video_timeline')))
|
||||
first_data=self._metadata('edge_felix_video_timeline')))
|
||||
|
||||
def get_followers(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -850,8 +829,7 @@ class Profile:
|
||||
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
|
||||
{'id': str(self.userid)},
|
||||
'https://www.instagram.com/' + self.username + '/',
|
||||
lambda d: d['data']['user']['edge_followed_by'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_followed_by']))
|
||||
|
||||
def get_followees(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -865,8 +843,7 @@ class Profile:
|
||||
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
|
||||
{'id': str(self.userid)},
|
||||
'https://www.instagram.com/' + self.username + '/',
|
||||
lambda d: d['data']['user']['edge_follow'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_follow']))
|
||||
|
||||
def get_similar_accounts(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -881,8 +858,8 @@ class Profile:
|
||||
yield from (Profile(self._context, edge["node"]) for edge in
|
||||
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
|
||||
{"user_id": str(self.userid), "include_chaining": True},
|
||||
"https://www.instagram.com/{0}/".format(self.username),
|
||||
self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
|
||||
"https://www.instagram.com/{0}/"
|
||||
.format(self.username))["data"]["user"]["edge_chaining"]["edges"])
|
||||
|
||||
|
||||
class StoryItem:
|
||||
|
Loading…
Reference in New Issue
Block a user