parent
74d5e35eb8
commit
73ec884ea4
@ -3,6 +3,7 @@
|
||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||
import ast
|
||||
import getpass
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
@ -201,6 +202,7 @@ class Post:
|
||||
self._profile = profile
|
||||
self._profile_id = profile_id
|
||||
self._full_metadata_dict = None
|
||||
self._rhx_gis = None
|
||||
|
||||
@classmethod
|
||||
def from_shortcode(cls, instaloader: 'Instaloader', shortcode: str):
|
||||
@ -239,11 +241,9 @@ class Post:
|
||||
@property
|
||||
def _full_metadata(self) -> Dict[str, Any]:
|
||||
if not self._full_metadata_dict:
|
||||
pic_json = self._instaloader.get_json("p/{0}/".format(self.shortcode), params={'__a': 1})
|
||||
if "graphql" in pic_json:
|
||||
self._full_metadata_dict = pic_json["graphql"]["shortcode_media"]
|
||||
else:
|
||||
self._full_metadata_dict = pic_json["media"]
|
||||
pic_json = self._instaloader.get_json("p/{0}/".format(self.shortcode), params={})
|
||||
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
|
||||
self._rhx_gis = pic_json['rhx_gis']
|
||||
return self._full_metadata_dict
|
||||
|
||||
def _field(self, *keys) -> Any:
|
||||
@ -387,7 +387,8 @@ class Post:
|
||||
return
|
||||
yield from self._instaloader.graphql_node_list(17852405266163336, {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_comment'],
|
||||
rhx_gis=self._rhx_gis)
|
||||
|
||||
def get_likes(self) -> Iterator[Dict[str, Any]]:
|
||||
"""Iterate over all likes of the post.
|
||||
@ -405,7 +406,8 @@ class Post:
|
||||
return
|
||||
yield from self._instaloader.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'])
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'],
|
||||
rhx_gis=self._rhx_gis)
|
||||
|
||||
def get_location(self) -> Optional[Dict[str, str]]:
|
||||
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
|
||||
@ -602,7 +604,7 @@ class Instaloader:
|
||||
:raises QueryReturnedNotFoundException: When the server responds with a 404.
|
||||
:raises ConnectionException: When query repeatedly failed.
|
||||
"""
|
||||
def graphql_query_waittime(query_id: int, untracked_queries: bool = False) -> int:
|
||||
def graphql_query_waittime(query_id: Union[int, str], untracked_queries: bool = False) -> int:
|
||||
sliding_window = 660
|
||||
timestamps = self.previous_queries.get(query_id)
|
||||
if not timestamps:
|
||||
@ -613,9 +615,9 @@ class Instaloader:
|
||||
if len(timestamps) < 100 and not untracked_queries:
|
||||
return 0
|
||||
return round(min(timestamps) + sliding_window - current_time) + 6
|
||||
is_graphql_query = 'query_id' in params and 'graphql/query' in path
|
||||
is_graphql_query = 'graphql/query' in path
|
||||
if is_graphql_query:
|
||||
query_id = params['query_id']
|
||||
query_id = params['query_id'] if 'query_id' in params else params['query_hash']
|
||||
waittime = graphql_query_waittime(query_id)
|
||||
if waittime > 0:
|
||||
self._log('\nToo many queries in the last time. Need to wait {} seconds.'.format(waittime))
|
||||
@ -635,7 +637,13 @@ class Instaloader:
|
||||
raise TooManyRequests("429 - Too Many Requests")
|
||||
if resp.status_code != 200:
|
||||
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||
resp_json = resp.json()
|
||||
if not is_graphql_query and not "__a" in params and host == "www.instagram.com":
|
||||
match = re.search(r'window\._sharedData = (.*);</script>', resp.text)
|
||||
if match is None:
|
||||
raise ConnectionException("Could not find \"window._sharedData\" in html response.")
|
||||
return json.loads(match.group(1))
|
||||
else:
|
||||
resp_json = resp.json()
|
||||
if 'status' in resp_json and resp_json['status'] != "ok":
|
||||
if 'message' in resp_json:
|
||||
raise ConnectionException("Returned \"{}\" status, message \"{}\".".format(resp_json['status'],
|
||||
@ -695,7 +703,7 @@ class Instaloader:
|
||||
return session
|
||||
|
||||
def graphql_query(self, query_identifier: Union[int, str], variables: Dict[str, Any],
|
||||
referer: Optional[str] = None) -> Dict[str, Any]:
|
||||
referer: Optional[str] = None, rhx_gis: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Do a GraphQL Query.
|
||||
|
||||
@ -713,9 +721,18 @@ class Instaloader:
|
||||
tmpsession.headers['accept'] = '*/*'
|
||||
if referer is not None:
|
||||
tmpsession.headers['referer'] = urllib.parse.quote(referer)
|
||||
|
||||
variables_json = json.dumps(variables, separators=(',', ':'))
|
||||
|
||||
if rhx_gis:
|
||||
values = "{}:{}:{}:{}".format(rhx_gis, tmpsession.cookies['csrftoken'], self.user_agent, variables_json)
|
||||
x_instagram_gis = hashlib.md5(values.encode()).hexdigest()
|
||||
tmpsession.cookies.set('ig_pr', '2')
|
||||
tmpsession.headers['x-instagram-gis'] = x_instagram_gis
|
||||
|
||||
resp_json = self.get_json('graphql/query',
|
||||
params={'query_id' if isinstance(query_identifier, int) else 'query_hash': query_identifier,
|
||||
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||
'variables': variables_json},
|
||||
session=tmpsession)
|
||||
if 'status' not in resp_json:
|
||||
self.error("GraphQL response did not contain a \"status\" field.")
|
||||
@ -740,20 +757,21 @@ class Instaloader:
|
||||
def get_id_by_username(self, profile: str) -> int:
|
||||
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||
return int(self.get_profile_metadata(profile)['user']['id'])
|
||||
return int(self.get_profile_metadata(profile)[0]['user']['id'])
|
||||
|
||||
def graphql_node_list(self, query_identifier: Union[int, str], query_variables: Dict[str, Any],
|
||||
query_referer: Optional[str],
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]],
|
||||
rhx_gis: Optional[str] = None) -> Iterator[Dict[str, Any]]:
|
||||
"""Retrieve a list of GraphQL nodes."""
|
||||
query_variables['first'] = Instaloader.GRAPHQL_PAGE_LENGTH
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer, rhx_gis)
|
||||
while True:
|
||||
edge_struct = edge_extractor(data)
|
||||
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||
if edge_struct['page_info']['has_next_page']:
|
||||
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer)
|
||||
data = self.graphql_query(query_identifier, query_variables, query_referer, rhx_gis)
|
||||
else:
|
||||
break
|
||||
|
||||
@ -1257,7 +1275,7 @@ class Instaloader:
|
||||
|
||||
if not self.is_logged_in:
|
||||
return
|
||||
data = self.get_profile_metadata(self.username)
|
||||
data, full_metadata = self.get_profile_metadata(self.username)
|
||||
user_id = data["user"]["id"]
|
||||
|
||||
while True:
|
||||
@ -1277,7 +1295,8 @@ class Instaloader:
|
||||
break
|
||||
data = self.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
|
||||
{'id': user_id, 'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||
'after': saved_media["page_info"]["end_cursor"]})['data']
|
||||
'after': saved_media["page_info"]["end_cursor"]},
|
||||
rhx_gis=full_metadata['rhx_gis'])['data']
|
||||
|
||||
def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
|
||||
filter_func: Optional[Callable[[Post], bool]] = None) -> None:
|
||||
@ -1403,15 +1422,15 @@ class Instaloader:
|
||||
return profile, profile_id
|
||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
||||
|
||||
def get_profile_metadata(self, profile_name: str) -> Dict[str, Any]:
|
||||
def get_profile_metadata(self, profile_name: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
|
||||
"""Retrieves a profile's metadata, for use with e.g. :meth:`get_profile_posts` and :meth:`check_profile_id`."""
|
||||
try:
|
||||
metadata = self.get_json('{}/'.format(profile_name), params={'__a': 1})
|
||||
return metadata['graphql'] if 'graphql' in metadata else metadata
|
||||
metadata = self.get_json('{}/'.format(profile_name), params={})
|
||||
return metadata['entry_data']['ProfilePage'][0]['graphql'], metadata
|
||||
except QueryReturnedNotFoundException:
|
||||
raise ProfileNotExistsException('Profile {} does not exist.'.format(profile_name))
|
||||
|
||||
def get_profile_posts(self, profile_metadata: Dict[str, Any]) -> Iterator[Post]:
|
||||
def get_profile_posts(self, profile_metadata: Dict[str, Any], rhx_gis: str) -> Iterator[Post]:
|
||||
"""Retrieve all posts from a profile."""
|
||||
profile_name = profile_metadata['user']['username']
|
||||
profile_id = int(profile_metadata['user']['id'])
|
||||
@ -1432,7 +1451,7 @@ class Instaloader:
|
||||
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
|
||||
'first': Instaloader.GRAPHQL_PAGE_LENGTH,
|
||||
'after': end_cursor},
|
||||
'https://www.instagram.com/{0}/'.format(profile_name))
|
||||
referer='https://www.instagram.com/{0}/'.format(profile_name), rhx_gis=rhx_gis)
|
||||
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||
yield from (Post(self, edge['node'], profile=profile_name, profile_id=profile_id)
|
||||
for edge in media['edges'])
|
||||
@ -1452,14 +1471,14 @@ class Instaloader:
|
||||
with suppress(ProfileNotExistsException):
|
||||
# ProfileNotExistsException is raised again later in check_profile_id() when we search the profile, so we
|
||||
# must suppress it here.
|
||||
profile_metadata = self.get_profile_metadata(name)
|
||||
profile_metadata, full_metadata = self.get_profile_metadata(name)
|
||||
|
||||
# check if profile does exist or name has changed since last download
|
||||
# and update name and json data if necessary
|
||||
name_updated, profile_id = self.check_profile_id(name, profile_metadata)
|
||||
if name_updated != name:
|
||||
name = name_updated
|
||||
profile_metadata = self.get_profile_metadata(name)
|
||||
profile_metadata, full_metadata = self.get_profile_metadata(name)
|
||||
|
||||
# Download profile picture
|
||||
if profile_pic or profile_pic_only:
|
||||
@ -1494,7 +1513,7 @@ class Instaloader:
|
||||
else:
|
||||
totalcount = profile_metadata["user"]["edge_owner_to_timeline_media"]["count"]
|
||||
count = 1
|
||||
for post in self.get_profile_posts(profile_metadata):
|
||||
for post in self.get_profile_posts(profile_metadata, rhx_gis=full_metadata['rhx_gis']):
|
||||
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||
count += 1
|
||||
if filter_func is not None and not filter_func(post):
|
||||
|
Loading…
Reference in New Issue
Block a user