Major code cleanup
Remove many code duplications, merely by using more pythonic idioms. Use GraphQL more often. Better cope with errors: All requests can be retried; failed requests do not cause program termination; all error strings are repeated to the user at the end of execution. download_post() (formerly download_node()) does not repeat node metadata request (before this commit, this request was executed up to three times).
This commit is contained in:
parent
5d83a4ccf6
commit
58882f508e
14
README.rst
14
README.rst
@ -235,15 +235,13 @@ For example, to get a list of all followees and a list of all followers of a pro
|
|||||||
loader.interactive_login(USERNAME)
|
loader.interactive_login(USERNAME)
|
||||||
|
|
||||||
# Retrieve followees
|
# Retrieve followees
|
||||||
followees = loader.get_followees(PROFILE)
|
|
||||||
print(PROFILE + " follows these profiles:")
|
print(PROFILE + " follows these profiles:")
|
||||||
for f in followees:
|
for f in loader.get_followees(PROFILE):
|
||||||
print("\t%s\t%s" % (f['username'], f['full_name']))
|
print("\t%s\t%s" % (f['username'], f['full_name']))
|
||||||
|
|
||||||
# Retrieve followers
|
# Retrieve followers
|
||||||
followers = loader.get_followers(PROFILE)
|
|
||||||
print("Followers of " + PROFILE + ":")
|
print("Followers of " + PROFILE + ":")
|
||||||
for f in followers:
|
for f in loader.get_followers(PROFILE):
|
||||||
print("\t%s\t%s" % (f['username'], f['full_name']))
|
print("\t%s\t%s" % (f['username'], f['full_name']))
|
||||||
|
|
||||||
Then, you may download all pictures of all followees with
|
Then, you may download all pictures of all followees with
|
||||||
@ -252,7 +250,7 @@ Then, you may download all pictures of all followees with
|
|||||||
|
|
||||||
for f in followees:
|
for f in followees:
|
||||||
try:
|
try:
|
||||||
loader.download(f['username'])
|
loader.download_profile(f['username'])
|
||||||
except instaloader.NonfatalException:
|
except instaloader.NonfatalException:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -260,9 +258,11 @@ You could also download your last 20 liked pics with
|
|||||||
|
|
||||||
.. code:: python
|
.. code:: python
|
||||||
|
|
||||||
loader.download_feed_pics(max_count=20, fast_update=True,
|
loader.download_feed_posts(max_count=20, fast_update=True,
|
||||||
filter_func=lambda node:
|
filter_func=lambda node:
|
||||||
not node["likes"]["viewer_has_liked"] if "likes" in node else not node["viewer_has_liked"])
|
not node["likes"]["viewer_has_liked"]
|
||||||
|
if "likes" in node else
|
||||||
|
not node["viewer_has_liked"])
|
||||||
|
|
||||||
To download the last 20 pictures with hashtag #cat, do
|
To download the last 20 pictures with hashtag #cat, do
|
||||||
|
|
||||||
|
604
instaloader.py
604
instaloader.py
@ -15,9 +15,10 @@ import tempfile
|
|||||||
import time
|
import time
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from base64 import b64decode, b64encode
|
from base64 import b64decode, b64encode
|
||||||
|
from contextlib import contextmanager, suppress
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import requests.utils
|
import requests.utils
|
||||||
@ -52,11 +53,11 @@ class NonfatalException(InstaloaderException):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ProfileNotExistsException(NonfatalException):
|
class QueryReturnedNotFoundException(InstaloaderException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ProfileAccessDeniedException(NonfatalException):
|
class ProfileNotExistsException(NonfatalException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@ -88,7 +89,7 @@ class BadCredentialsException(InstaloaderException):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ConnectionException(InstaloaderException):
|
class ConnectionException(NonfatalException):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@ -149,17 +150,41 @@ class Instaloader:
|
|||||||
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
|
self.dirname_pattern = dirname_pattern if dirname_pattern is not None else '{target}'
|
||||||
self.filename_pattern = filename_pattern.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \
|
self.filename_pattern = filename_pattern.replace('{date}', '{date:%Y-%m-%d_%H-%M-%S}') \
|
||||||
if filename_pattern is not None else '{date:%Y-%m-%d_%H-%M-%S}'
|
if filename_pattern is not None else '{date:%Y-%m-%d_%H-%M-%S}'
|
||||||
|
self.error_log = []
|
||||||
|
|
||||||
def _log(self, *msg, sep='', end='\n', flush=False):
|
def _log(self, *msg, sep='', end='\n', flush=False):
|
||||||
|
"""Log a message to stdout that can be suppressed with --quiet."""
|
||||||
if not self.quiet:
|
if not self.quiet:
|
||||||
print(*msg, sep=sep, end=end, flush=flush)
|
print(*msg, sep=sep, end=end, flush=flush)
|
||||||
|
|
||||||
|
def _error(self, msg: str):
|
||||||
|
"""Log a non-fatal error message to stderr, which is repeated at program termination."""
|
||||||
|
print(msg, file=sys.stderr)
|
||||||
|
self.error_log.append(msg)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _error_catcher(self, extra_info: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
Context manager to catch, output and record NonfatalExceptions.
|
||||||
|
|
||||||
|
:param extra_info: String to prefix error message with."""
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
except NonfatalException as err:
|
||||||
|
if extra_info:
|
||||||
|
self._error('{}: {}'.format(extra_info, err))
|
||||||
|
else:
|
||||||
|
self._error('{}'.format(err))
|
||||||
|
|
||||||
def _sleep(self):
|
def _sleep(self):
|
||||||
"""Sleep a short, random time if self.sleep is set. Called before each request to the instagram.com."""
|
"""Sleep a short, random time if self.sleep is set. Called before each request to the instagram.com."""
|
||||||
if self.sleep:
|
if self.sleep:
|
||||||
time.sleep(random.uniform(0.25, 2.0))
|
time.sleep(random.uniform(0.5, 1.75))
|
||||||
|
|
||||||
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
|
def _get_and_write_raw(self, url: str, filename: str, tries: int = 3) -> None:
|
||||||
|
"""Downloads raw data.
|
||||||
|
|
||||||
|
:raises ConnectionException: When download repeatedly failed."""
|
||||||
try:
|
try:
|
||||||
resp = self.get_anonymous_session().get(url, stream=True)
|
resp = self.get_anonymous_session().get(url, stream=True)
|
||||||
if resp.status_code == 200:
|
if resp.status_code == 200:
|
||||||
@ -168,27 +193,43 @@ class Instaloader:
|
|||||||
resp.raw.decode_content = True
|
resp.raw.decode_content = True
|
||||||
shutil.copyfileobj(resp.raw, file)
|
shutil.copyfileobj(resp.raw, file)
|
||||||
else:
|
else:
|
||||||
raise ConnectionException("Request returned HTTP error code {}.".format(resp.status_code))
|
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||||
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
except (urllib3.exceptions.HTTPError, requests.exceptions.RequestException, ConnectionException) as err:
|
||||||
print("URL: {}\n{}".format(url, err), file=sys.stderr)
|
error_string = "URL {}: {}".format(url, err)
|
||||||
if tries <= 1:
|
if tries <= 1:
|
||||||
raise NodeUnavailableException
|
raise ConnectionException(error_string)
|
||||||
|
else:
|
||||||
|
self._error(error_string)
|
||||||
self._sleep()
|
self._sleep()
|
||||||
self._get_and_write_raw(url, filename, tries - 1)
|
self._get_and_write_raw(url, filename, tries - 1)
|
||||||
|
|
||||||
def get_json(self, name: str, session: requests.Session = None,
|
def _get_json(self, url: str, params: Optional[Dict[str, Any]] = None,
|
||||||
max_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
session: Optional[requests.Session] = None, tries: int = 3) -> Dict[str, Any]:
|
||||||
"""Return JSON of a profile"""
|
"""JSON request to Instagram.
|
||||||
if session is None:
|
|
||||||
session = self.session
|
:param url: URL, relative to https://www.instagram.com/
|
||||||
|
:param params: GET parameters
|
||||||
|
:param session: Session to use, or None to use self.session
|
||||||
|
:param tries: Maximum number of attempts until a exception is raised
|
||||||
|
:return: Decoded response dictionary
|
||||||
|
"""
|
||||||
|
sess = session if session else self.session
|
||||||
|
try:
|
||||||
self._sleep()
|
self._sleep()
|
||||||
if not max_id:
|
resp = sess.get('https://www.instagram.com/' + url, params=params)
|
||||||
resp = session.get('https://www.instagram.com/' + name)
|
if resp.status_code == 404:
|
||||||
|
raise QueryReturnedNotFoundException("404")
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise ConnectionException("HTTP error code {}.".format(resp.status_code))
|
||||||
|
return resp.json()
|
||||||
|
except (ConnectionException, json.decoder.JSONDecodeError) as err:
|
||||||
|
error_string = "JSON Query to {}: {}".format(url, err)
|
||||||
|
if tries <= 1:
|
||||||
|
raise ConnectionException(error_string)
|
||||||
else:
|
else:
|
||||||
resp = session.get('https://www.instagram.com/' + name, params={'max_id': max_id})
|
self._error(error_string)
|
||||||
match = re.search('window\\._sharedData = .*<', resp.text)
|
self._sleep()
|
||||||
if match is not None:
|
self._get_json(url, params, sess, tries - 1)
|
||||||
return json.loads(match.group(0)[21:-2])
|
|
||||||
|
|
||||||
def default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
def default_http_header(self, empty_session_only: bool = False) -> Dict[str, str]:
|
||||||
"""Returns default HTTP header we use for requests."""
|
"""Returns default HTTP header we use for requests."""
|
||||||
@ -238,13 +279,10 @@ class Instaloader:
|
|||||||
tmpsession.headers['accept'] = '*/*'
|
tmpsession.headers['accept'] = '*/*'
|
||||||
if referer is not None:
|
if referer is not None:
|
||||||
tmpsession.headers['referer'] = referer
|
tmpsession.headers['referer'] = referer
|
||||||
self._sleep()
|
return self._get_json('graphql/query',
|
||||||
response = tmpsession.get('https://www.instagram.com/graphql/query',
|
|
||||||
params={'query_id': query_id,
|
params={'query_id': query_id,
|
||||||
'variables': json.dumps(variables, separators=(',', ':'))})
|
'variables': json.dumps(variables, separators=(',', ':'))},
|
||||||
if response.status_code != 200:
|
session=tmpsession)
|
||||||
raise ConnectionException("GraphQL query returned HTTP error code {}.".format(response.status_code))
|
|
||||||
return response.json()
|
|
||||||
|
|
||||||
def get_username_by_id(self, profile_id: int) -> str:
|
def get_username_by_id(self, profile_id: int) -> str:
|
||||||
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
"""To get the current username of a profile, given its unique ID, this function can be used."""
|
||||||
@ -261,89 +299,49 @@ class Instaloader:
|
|||||||
raise LoginRequiredException("Login required to determine username (ID: " + str(profile_id) + ").")
|
raise LoginRequiredException("Login required to determine username (ID: " + str(profile_id) + ").")
|
||||||
else:
|
else:
|
||||||
shortcode = mediaid_to_shortcode(int(data['edges'][0]["node"]["id"]))
|
shortcode = mediaid_to_shortcode(int(data['edges'][0]["node"]["id"]))
|
||||||
data = self.get_json("p/" + shortcode)
|
return self.get_post_metadata(shortcode)['owner']['username']
|
||||||
return data['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username']
|
|
||||||
|
|
||||||
def get_id_by_username(self, profile: str) -> int:
|
def get_id_by_username(self, profile: str) -> int:
|
||||||
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
"""Each Instagram profile has its own unique ID which stays unmodified even if a user changes
|
||||||
his/her username. To get said ID, given the profile's name, you may call this function."""
|
his/her username. To get said ID, given the profile's name, you may call this function."""
|
||||||
data = self.get_json(profile, session=self.get_anonymous_session())
|
return int(self.get_profile_metadata(profile)['user']['id'])
|
||||||
if "ProfilePage" not in data["entry_data"]:
|
|
||||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
|
||||||
return int(data['entry_data']['ProfilePage'][0]['user']['id'])
|
|
||||||
|
|
||||||
def get_followers(self, profile: str) -> List[Dict[str, Any]]:
|
def graphql_node_list(self, query_id: int, query_variables: Dict[str, Any], query_referer: Optional[str],
|
||||||
|
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||||
|
query_variables['first'] = 500
|
||||||
|
data = self.graphql_query(query_id, query_variables, query_referer)
|
||||||
|
while True:
|
||||||
|
edge_struct = edge_extractor(data)
|
||||||
|
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||||
|
if edge_struct['page_info']['has_next_page']:
|
||||||
|
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||||
|
data = self.graphql_query(query_id, query_variables, query_referer)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
def get_followers(self, profile: str) -> Iterator[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Retrieve list of followers of given profile.
|
Retrieve list of followers of given profile.
|
||||||
To use this, one needs to be logged in and private profiles has to be followed,
|
To use this, one needs to be logged in and private profiles has to be followed,
|
||||||
otherwise this returns an empty list.
|
otherwise this returns an empty list.
|
||||||
|
|
||||||
:param profile: Name of profile to lookup followers.
|
:param profile: Name of profile to lookup followers.
|
||||||
:return: List of followers (list of dictionaries).
|
|
||||||
"""
|
"""
|
||||||
profile_id = self.get_id_by_username(profile)
|
yield from self.graphql_node_list(17851374694183129, {'id': str(self.get_id_by_username(profile))},
|
||||||
data = self.graphql_query(17851374694183129, {'id': str(profile_id),
|
'https://www.instagram.com/' + profile + '/',
|
||||||
'first': 500},
|
lambda d: d['data']['user']['edge_followed_by'])
|
||||||
referer='https://www.instagram.com/' + profile + '/')
|
|
||||||
followers = []
|
|
||||||
while True:
|
|
||||||
edge_followed_by = data['data']['user']['edge_followed_by']
|
|
||||||
followers.extend([follower['node'] for follower in edge_followed_by['edges']])
|
|
||||||
page_info = edge_followed_by['page_info']
|
|
||||||
if page_info['has_next_page']:
|
|
||||||
data = self.graphql_query(17851374694183129, {'id': str(profile_id),
|
|
||||||
'first': 500,
|
|
||||||
'after': page_info['end_cursor']},
|
|
||||||
referer='https://www.instagram.com/' + profile + '/')
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return followers
|
|
||||||
|
|
||||||
def get_followees(self, profile: str) -> List[Dict[str, Any]]:
|
def get_followees(self, profile: str) -> Iterator[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Retrieve list of followees (followings) of given profile.
|
Retrieve list of followees (followings) of given profile.
|
||||||
To use this, one needs to be logged in and private profiles has to be followed,
|
To use this, one needs to be logged in and private profiles has to be followed,
|
||||||
otherwise this returns an empty list.
|
otherwise this returns an empty list.
|
||||||
|
|
||||||
:param profile: Name of profile to lookup followers.
|
:param profile: Name of profile to lookup followers.
|
||||||
:return: List of followees (list of dictionaries).
|
|
||||||
"""
|
"""
|
||||||
profile_id = self.get_id_by_username(profile)
|
yield from self.graphql_node_list(17874545323001329, {'id': str(self.get_id_by_username(profile))},
|
||||||
data = self.graphql_query(17874545323001329, {'id': profile_id,
|
'https://www.instagram.com/' + profile + '/',
|
||||||
'first': 500},
|
lambda d: d['data']['user']['edge_follow'])
|
||||||
referer='https://www.instagram.com/' + profile + '/')
|
|
||||||
followees = []
|
|
||||||
while True:
|
|
||||||
edge_follow = data['data']['user']['edge_follow']
|
|
||||||
followees.extend([followee['node'] for followee in edge_follow['edges']])
|
|
||||||
page_info = edge_follow['page_info']
|
|
||||||
if page_info['has_next_page']:
|
|
||||||
data = self.graphql_query(17874545323001329, {'id': profile_id,
|
|
||||||
'first': 500,
|
|
||||||
'after': page_info['end_cursor']},
|
|
||||||
referer='https://www.instagram.com/' + profile + '/')
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return followees
|
|
||||||
|
|
||||||
def get_comments(self, shortcode: str) -> List[Dict[str, Any]]:
|
|
||||||
"""Retrieve comments of node with given shortcode."""
|
|
||||||
data = self.graphql_query(17852405266163336, {'shortcode': shortcode,
|
|
||||||
'first': 500},
|
|
||||||
referer='https://www.instagram.com/p/' + shortcode + '/')
|
|
||||||
comments = []
|
|
||||||
while True:
|
|
||||||
edge_media_to_comment = data['data']['shortcode_media']['edge_media_to_comment']
|
|
||||||
comments.extend([comment['node'] for comment in edge_media_to_comment['edges']])
|
|
||||||
page_info = edge_media_to_comment['page_info']
|
|
||||||
if page_info['has_next_page']:
|
|
||||||
data = self.graphql_query(17852405266163336, {'shortcode': shortcode,
|
|
||||||
'first': 500,
|
|
||||||
'after': page_info['end_cursor']},
|
|
||||||
referer='https://www.instagram.com/p/' + shortcode + '/')
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return comments
|
|
||||||
|
|
||||||
def download_pic(self, filename: str, url: str, mtime: datetime,
|
def download_pic(self, filename: str, url: str, mtime: datetime,
|
||||||
filename_suffix: Optional[str] = None) -> bool:
|
filename_suffix: Optional[str] = None) -> bool:
|
||||||
@ -361,6 +359,12 @@ class Instaloader:
|
|||||||
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_comments(self, shortcode: str) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Retrieve comments of node with given shortcode."""
|
||||||
|
yield from self.graphql_node_list(17852405266163336, {'shortcode': shortcode},
|
||||||
|
'https://www.instagram.com/p/' + shortcode + '/',
|
||||||
|
lambda d: d['data']['shortcode_media']['edge_media_to_comment'])
|
||||||
|
|
||||||
def update_comments(self, filename: str, shortcode: str) -> None:
|
def update_comments(self, filename: str, shortcode: str) -> None:
|
||||||
filename += '_comments.json'
|
filename += '_comments.json'
|
||||||
try:
|
try:
|
||||||
@ -393,7 +397,7 @@ class Instaloader:
|
|||||||
pcaption = "txt"
|
pcaption = "txt"
|
||||||
else:
|
else:
|
||||||
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
pcaption = '[' + ((pcaption[:29] + u"\u2026") if len(pcaption) > 31 else pcaption) + ']'
|
||||||
try:
|
with suppress(FileNotFoundError):
|
||||||
with open(filename, 'rb') as file:
|
with open(filename, 'rb') as file:
|
||||||
file_caption = file.read()
|
file_caption = file.read()
|
||||||
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
if file_caption.replace(b'\r\n', b'\n') == caption.replace(b'\r\n', b'\n'):
|
||||||
@ -416,8 +420,6 @@ class Instaloader:
|
|||||||
self._log(pcaption + ' updated', end=' ', flush=True)
|
self._log(pcaption + ' updated', end=' ', flush=True)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
self._log('txt updated', end=' ', flush=True)
|
self._log('txt updated', end=' ', flush=True)
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
self._log(pcaption, end=' ', flush=True)
|
self._log(pcaption, end=' ', flush=True)
|
||||||
except UnicodeEncodeError:
|
except UnicodeEncodeError:
|
||||||
@ -463,6 +465,7 @@ class Instaloader:
|
|||||||
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
|
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index + offset:]
|
||||||
self._get_and_write_raw(url, filename)
|
self._get_and_write_raw(url, filename)
|
||||||
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
|
os.utime(filename, (datetime.now().timestamp(), date_object.timestamp()))
|
||||||
|
self._log('') # log output of _get_and_write_raw() does not produce \n
|
||||||
|
|
||||||
def save_session_to_file(self, filename: Optional[str] = None) -> None:
|
def save_session_to_file(self, filename: Optional[str] = None) -> None:
|
||||||
"""Saves requests.Session object."""
|
"""Saves requests.Session object."""
|
||||||
@ -495,14 +498,11 @@ class Instaloader:
|
|||||||
self.session = session
|
self.session = session
|
||||||
self.username = username
|
self.username = username
|
||||||
|
|
||||||
def test_login(self, session: requests.Session) -> Optional[str]:
|
def test_login(self, session: Optional[requests.Session]) -> Optional[str]:
|
||||||
"""Returns the Instagram username to which given requests.Session object belongs, or None."""
|
"""Returns the Instagram username to which given requests.Session object belongs, or None."""
|
||||||
if self.session is None:
|
if session:
|
||||||
return
|
data = self._get_json('', params={'__a': 1}, session=session)
|
||||||
data = self.get_json(str(), session=session)
|
return data['graphql']['user']['username'] if 'graphql' in data else None
|
||||||
if data['config']['viewer'] is None:
|
|
||||||
return
|
|
||||||
return data['config']['viewer']['username']
|
|
||||||
|
|
||||||
def login(self, user: str, passwd: str) -> None:
|
def login(self, user: str, passwd: str) -> None:
|
||||||
"""Log in to instagram with given username and password and internally store session object"""
|
"""Log in to instagram with given username and password and internally store session object"""
|
||||||
@ -527,51 +527,34 @@ class Instaloader:
|
|||||||
else:
|
else:
|
||||||
raise ConnectionException('Login error! Connection error!')
|
raise ConnectionException('Login error! Connection error!')
|
||||||
|
|
||||||
def get_feed_json(self, end_cursor: str = None) -> Dict[str, Any]:
|
def get_post_metadata(self, shortcode: str, tries: int = 3) -> Dict[str, Any]:
|
||||||
"""
|
"""Get full metadata of the post associated with given shortcode.
|
||||||
Get JSON of the user's feed.
|
|
||||||
|
|
||||||
:param end_cursor: The end cursor, as from json["feed"]["media"]["page_info"]["end_cursor"]
|
:raises NodeUnavailableException: If the data cannot be retrieved."""
|
||||||
:return: JSON
|
pic_json = self._get_json("p/{0}/".format(shortcode), params={'__a': 1})
|
||||||
"""
|
|
||||||
if end_cursor is None:
|
|
||||||
return self.get_json(str())["entry_data"]["FeedPage"][0]
|
|
||||||
return self.graphql_query(17863003771166879, {'fetch_media_item_count': 12,
|
|
||||||
'fetch_media_item_cursor': end_cursor,
|
|
||||||
'fetch_comment_count': 4,
|
|
||||||
'fetch_like': 10})
|
|
||||||
|
|
||||||
def get_node_metadata(self, node_code: str, tries: int = 3) -> Dict[str, Any]:
|
|
||||||
pic_json = self.get_json("p/" + node_code)
|
|
||||||
try:
|
try:
|
||||||
media = pic_json["entry_data"]["PostPage"][0]["graphql"]["shortcode_media"] \
|
media = pic_json["graphql"]["shortcode_media"] if "graphql" in pic_json else pic_json["media"]
|
||||||
if "graphql" in pic_json["entry_data"]["PostPage"][0] \
|
|
||||||
else pic_json["entry_data"]["PostPage"][0]["media"]
|
|
||||||
except KeyError as err:
|
except KeyError as err:
|
||||||
print(err, file=sys.stderr)
|
|
||||||
print(json.dumps(pic_json, indent=4), file=sys.stderr)
|
print(json.dumps(pic_json, indent=4), file=sys.stderr)
|
||||||
|
error_string = "Post {}: {}".format(shortcode, err)
|
||||||
if tries <= 1:
|
if tries <= 1:
|
||||||
raise NodeUnavailableException
|
raise NodeUnavailableException(error_string)
|
||||||
|
else:
|
||||||
|
self._error(error_string)
|
||||||
self._sleep()
|
self._sleep()
|
||||||
media = self.get_node_metadata(node_code, tries - 1)
|
media = self.get_post_metadata(shortcode, tries - 1)
|
||||||
return media
|
return media
|
||||||
|
|
||||||
def get_location(self, node_code: str) -> Dict[str, str]:
|
def get_location(self, post_metadata: Dict[str, Any]) -> Optional[Dict[str, str]]:
|
||||||
try:
|
if post_metadata["location"] is not None:
|
||||||
media = self.get_node_metadata(node_code)
|
location_json = self._get_json("explore/locations/{0}/".format(post_metadata["location"]["id"]),
|
||||||
except NodeUnavailableException:
|
params={'__a': 1})
|
||||||
print("Unable to lookup location for node \"https://www.instagram.com/p/{}/\".".format(node_code),
|
return location_json["location"]
|
||||||
file=sys.stderr)
|
|
||||||
return dict()
|
|
||||||
if media["location"] is not None:
|
|
||||||
location_json = self.get_json("explore/locations/" +
|
|
||||||
media["location"]["id"])
|
|
||||||
return location_json["entry_data"]["LocationsPage"][0]["location"]
|
|
||||||
|
|
||||||
def download_node(self, node: Dict[str, Any], profile: Optional[str], target: str,
|
def download_post(self, node: Dict[str, Any], profile: Optional[str], target: str,
|
||||||
download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool:
|
download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool:
|
||||||
"""
|
"""
|
||||||
Download everything associated with one instagram node, i.e. picture, caption and video.
|
Download everything associated with one instagram post node, i.e. picture, caption and video.
|
||||||
|
|
||||||
:param node: Node, as from media->nodes list in instagram's JSONs
|
:param node: Node, as from media->nodes list in instagram's JSONs
|
||||||
:param profile: Name of profile to which this node belongs
|
:param profile: Name of profile to which this node belongs
|
||||||
@ -585,36 +568,34 @@ class Instaloader:
|
|||||||
needs_profilename = (format_string_contains_key(self.dirname_pattern, 'profile') or
|
needs_profilename = (format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||||
format_string_contains_key(self.filename_pattern, 'profile'))
|
format_string_contains_key(self.filename_pattern, 'profile'))
|
||||||
shortcode = node['shortcode'] if 'shortcode' in node else node['code']
|
shortcode = node['shortcode'] if 'shortcode' in node else node['code']
|
||||||
|
post_metadata = None
|
||||||
if needs_profilename:
|
if needs_profilename:
|
||||||
if already_has_profilename:
|
if already_has_profilename:
|
||||||
profilename = profile if profile is not None else node['owner']['username']
|
profilename = profile if profile is not None else node['owner']['username']
|
||||||
|
profilename = profilename.lower()
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
metadata = self.get_node_metadata(shortcode)
|
post_metadata = self.get_post_metadata(shortcode)
|
||||||
profilename = metadata['owner']['username']
|
profilename = post_metadata['owner']['username'].lower()
|
||||||
except NodeUnavailableException:
|
except (NonfatalException, KeyError) as err:
|
||||||
print("Unable to gather profilename for node "
|
self._error("Unable to get owner name of post {}: {} -- using \'UNKNOWN\'.".format(shortcode, err))
|
||||||
"\"https://www.instagram.com/p/{}/\".".format(shortcode), file=sys.stderr)
|
|
||||||
profilename = 'UNKNOWN'
|
profilename = 'UNKNOWN'
|
||||||
else:
|
else:
|
||||||
profilename = None
|
profilename = None
|
||||||
profilename = profilename.lower() if profilename else None
|
|
||||||
date = datetime.fromtimestamp(node["date"] if "date" in node else node["taken_at_timestamp"])
|
date = datetime.fromtimestamp(node["date"] if "date" in node else node["taken_at_timestamp"])
|
||||||
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
dirname = self.dirname_pattern.format(profile=profilename, target=target.lower())
|
||||||
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
filename = dirname + '/' + self.filename_pattern.format(profile=profilename, target=target.lower(),
|
||||||
date=date,
|
date=date,
|
||||||
shortcode=shortcode)
|
shortcode=shortcode)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
|
url = node["display_url"] if "display_url" in node else node["display_src"]
|
||||||
if '__typename' in node:
|
if '__typename' in node:
|
||||||
if node['__typename'] == 'GraphSidecar':
|
if node['__typename'] == 'GraphSidecar':
|
||||||
self._sleep()
|
if not post_metadata:
|
||||||
sidecar_data = self.session.get('https://www.instagram.com/p/' + shortcode + '/',
|
post_metadata = self.get_post_metadata(shortcode)
|
||||||
params={'__a': 1}).json()
|
|
||||||
edge_number = 1
|
edge_number = 1
|
||||||
downloaded = True
|
downloaded = True
|
||||||
media = sidecar_data["graphql"]["shortcode_media"] if "graphql" in sidecar_data else sidecar_data[
|
for edge in post_metadata['edge_sidecar_to_children']['edges']:
|
||||||
"media"]
|
|
||||||
for edge in media['edge_sidecar_to_children']['edges']:
|
|
||||||
edge_downloaded = self.download_pic(filename=filename,
|
edge_downloaded = self.download_pic(filename=filename,
|
||||||
url=edge['node']['display_url'],
|
url=edge['node']['display_url'],
|
||||||
mtime=date,
|
mtime=date,
|
||||||
@ -622,7 +603,6 @@ class Instaloader:
|
|||||||
downloaded = downloaded and edge_downloaded
|
downloaded = downloaded and edge_downloaded
|
||||||
edge_number += 1
|
edge_number += 1
|
||||||
elif node['__typename'] in ['GraphImage', 'GraphVideo']:
|
elif node['__typename'] in ['GraphImage', 'GraphVideo']:
|
||||||
url = node["display_url"] if "display_url" in node else node["display_src"]
|
|
||||||
downloaded = self.download_pic(filename=filename,
|
downloaded = self.download_pic(filename=filename,
|
||||||
url=url,
|
url=url,
|
||||||
mtime=date)
|
mtime=date)
|
||||||
@ -631,7 +611,7 @@ class Instaloader:
|
|||||||
downloaded = False
|
downloaded = False
|
||||||
else:
|
else:
|
||||||
# Node is an old image or video.
|
# Node is an old image or video.
|
||||||
downloaded = self.download_pic(filename=filename, url=node["display_src"], mtime=date)
|
downloaded = self.download_pic(filename=filename, url=url, mtime=date)
|
||||||
if "edge_media_to_caption" in node and node["edge_media_to_caption"]["edges"]:
|
if "edge_media_to_caption" in node and node["edge_media_to_caption"]["edges"]:
|
||||||
self.save_caption(filename, date, node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
self.save_caption(filename, date, node["edge_media_to_caption"]["edges"][0]["node"]["text"])
|
||||||
elif "caption" in node:
|
elif "caption" in node:
|
||||||
@ -639,12 +619,15 @@ class Instaloader:
|
|||||||
else:
|
else:
|
||||||
self._log("<no caption>", end=' ', flush=True)
|
self._log("<no caption>", end=' ', flush=True)
|
||||||
if node["is_video"] and download_videos:
|
if node["is_video"] and download_videos:
|
||||||
video_data = self.get_json('p/' + shortcode)
|
if not post_metadata:
|
||||||
|
post_metadata = self.get_post_metadata(shortcode)
|
||||||
self.download_pic(filename=filename,
|
self.download_pic(filename=filename,
|
||||||
url=video_data['entry_data']['PostPage'][0]['graphql']['shortcode_media']['video_url'],
|
url=post_metadata['video_url'],
|
||||||
mtime=date)
|
mtime=date)
|
||||||
if geotags:
|
if geotags:
|
||||||
location = self.get_location(shortcode)
|
if not post_metadata:
|
||||||
|
post_metadata = self.get_post_metadata(shortcode)
|
||||||
|
location = self.get_location(post_metadata)
|
||||||
if location:
|
if location:
|
||||||
self.save_location(filename, location, date)
|
self.save_location(filename, location, date)
|
||||||
if download_comments:
|
if download_comments:
|
||||||
@ -652,6 +635,41 @@ class Instaloader:
|
|||||||
self._log()
|
self._log()
|
||||||
return downloaded
|
return downloaded
|
||||||
|
|
||||||
|
def get_stories(self, userids: Optional[List[int]] = None) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Get available stories from followees or all stories of users whose ID are given.
|
||||||
|
Does not mark stories as seen.
|
||||||
|
To use this, one needs to be logged in
|
||||||
|
|
||||||
|
:param userids: List of user IDs to be processed in terms of downloading their stories, or None.
|
||||||
|
"""
|
||||||
|
tempsession = copy_session(self.session)
|
||||||
|
header = tempsession.headers
|
||||||
|
header['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; scale=2.00; 750x1334) ' \
|
||||||
|
'AppleWebKit/420+'
|
||||||
|
del header['Host']
|
||||||
|
del header['Origin']
|
||||||
|
del header['X-Instagram-AJAX']
|
||||||
|
del header['X-Requested-With']
|
||||||
|
|
||||||
|
def _get(url):
|
||||||
|
self._sleep()
|
||||||
|
resp = tempsession.get(url)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
raise ConnectionException('Failed to fetch stories.')
|
||||||
|
return json.loads(resp.text)
|
||||||
|
|
||||||
|
url_reel_media = 'https://i.instagram.com/api/v1/feed/user/{0}/reel_media/'
|
||||||
|
url_reels_tray = 'https://i.instagram.com/api/v1/feed/reels_tray/'
|
||||||
|
if userids is not None:
|
||||||
|
for userid in userids:
|
||||||
|
yield _get(url_reel_media.format(userid))
|
||||||
|
else:
|
||||||
|
data = _get(url_reels_tray)
|
||||||
|
if not 'tray' in data:
|
||||||
|
raise BadResponseException('Bad story reel JSON.')
|
||||||
|
for user in data["tray"]:
|
||||||
|
yield user if "items" in user else _get(url_reel_media.format(user['user']['pk']))
|
||||||
|
|
||||||
def download_stories(self,
|
def download_stories(self,
|
||||||
userids: Optional[List[int]] = None,
|
userids: Optional[List[int]] = None,
|
||||||
download_videos: bool = True,
|
download_videos: bool = True,
|
||||||
@ -671,35 +689,7 @@ class Instaloader:
|
|||||||
if self.username is None:
|
if self.username is None:
|
||||||
raise LoginRequiredException('Login required to download stories')
|
raise LoginRequiredException('Login required to download stories')
|
||||||
|
|
||||||
tempsession = copy_session(self.session)
|
for user_stories in self.get_stories(userids):
|
||||||
header = tempsession.headers
|
|
||||||
header['User-Agent'] = 'Instagram 10.3.2 (iPhone7,2; iPhone OS 9_3_3; en_US; en-US; scale=2.00; 750x1334) ' \
|
|
||||||
'AppleWebKit/420+'
|
|
||||||
del header['Host']
|
|
||||||
del header['Origin']
|
|
||||||
del header['X-Instagram-AJAX']
|
|
||||||
del header['X-Requested-With']
|
|
||||||
|
|
||||||
def _user_stories():
|
|
||||||
def _get(url):
|
|
||||||
self._sleep()
|
|
||||||
resp = tempsession.get(url)
|
|
||||||
if resp.status_code != 200:
|
|
||||||
raise ConnectionException('Failed to fetch stories.')
|
|
||||||
return json.loads(resp.text)
|
|
||||||
url_reel_media = 'https://i.instagram.com/api/v1/feed/user/{0}/reel_media/'
|
|
||||||
url_reels_tray = 'https://i.instagram.com/api/v1/feed/reels_tray/'
|
|
||||||
if userids is not None:
|
|
||||||
for userid in userids:
|
|
||||||
yield _get(url_reel_media.format(userid))
|
|
||||||
else:
|
|
||||||
data = _get(url_reels_tray)
|
|
||||||
if not 'tray' in data:
|
|
||||||
raise BadResponseException('Bad story reel JSON.')
|
|
||||||
for user in data["tray"]:
|
|
||||||
yield user if "items" in user else _get(url_reel_media.format(user['user']['pk']))
|
|
||||||
|
|
||||||
for user_stories in _user_stories():
|
|
||||||
if "items" not in user_stories:
|
if "items" not in user_stories:
|
||||||
continue
|
continue
|
||||||
name = user_stories["user"]["username"].lower()
|
name = user_stories["user"]["username"].lower()
|
||||||
@ -710,7 +700,6 @@ class Instaloader:
|
|||||||
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
self._sleep()
|
|
||||||
shortcode = item["code"] if "code" in item else "no_code"
|
shortcode = item["code"] if "code" in item else "no_code"
|
||||||
|
|
||||||
date_float = item["device_timestamp"] if "device_timestamp" in item else item["taken_at"]
|
date_float = item["device_timestamp"] if "device_timestamp" in item else item["taken_at"]
|
||||||
@ -726,7 +715,7 @@ class Instaloader:
|
|||||||
date=date,
|
date=date,
|
||||||
shortcode=shortcode)
|
shortcode=shortcode)
|
||||||
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
||||||
try:
|
with self._error_catcher('Download story {} from user {}'.format(shortcode, name)):
|
||||||
if "image_versions2" in item:
|
if "image_versions2" in item:
|
||||||
url = item["image_versions2"]["candidates"][0]["url"]
|
url = item["image_versions2"]["candidates"][0]["url"]
|
||||||
downloaded = self.download_pic(filename=filename,
|
downloaded = self.download_pic(filename=filename,
|
||||||
@ -746,12 +735,6 @@ class Instaloader:
|
|||||||
downloaded = self.download_pic(filename=filename,
|
downloaded = self.download_pic(filename=filename,
|
||||||
url=item["video_versions"][0]["url"],
|
url=item["video_versions"][0]["url"],
|
||||||
mtime=date)
|
mtime=date)
|
||||||
if "video_duration" in item and self.sleep and downloaded:
|
|
||||||
time.sleep(item["video_duration"])
|
|
||||||
except NodeUnavailableException:
|
|
||||||
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {} from stories."
|
|
||||||
.format(shortcode, name), file=sys.stderr)
|
|
||||||
continue
|
|
||||||
if item["story_locations"]:
|
if item["story_locations"]:
|
||||||
location = item["story_locations"][0]["location"]
|
location = item["story_locations"][0]["location"]
|
||||||
if location:
|
if location:
|
||||||
@ -760,31 +743,11 @@ class Instaloader:
|
|||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
break
|
break
|
||||||
|
|
||||||
def download_feed_pics(self, max_count: int = None, fast_update: bool = False,
|
def get_feed_posts(self) -> Iterator[Dict[str, Any]]:
|
||||||
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
|
"""Get Posts of the user's feed."""
|
||||||
download_videos: bool = True, geotags: bool = False,
|
|
||||||
download_comments: bool = False) -> None:
|
|
||||||
"""
|
|
||||||
Download pictures from the user's feed.
|
|
||||||
|
|
||||||
Example to download up to the 20 pics the user last liked:
|
data = self._get_json('', params={'__a': 1})
|
||||||
>>> loader = Instaloader()
|
|
||||||
>>> loader.load_session_from_file('USER')
|
|
||||||
>>> loader.download_feed_pics(max_count=20, fast_update=True,
|
|
||||||
>>> filter_func=lambda node:
|
|
||||||
>>> not node["likes"]["viewer_has_liked"]
|
|
||||||
>>> if "likes" in node else
|
|
||||||
>>> not node["viewer_has_liked"])
|
|
||||||
|
|
||||||
:param max_count: Maximum count of pictures to download
|
|
||||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
|
||||||
:param filter_func: function(node), which returns True if given picture should not be downloaded
|
|
||||||
:param download_videos: True, if videos should be downloaded
|
|
||||||
:param geotags: Download geotags
|
|
||||||
:param download_comments: Update comments
|
|
||||||
"""
|
|
||||||
data = self.get_feed_json()
|
|
||||||
count = 1
|
|
||||||
while True:
|
while True:
|
||||||
if "graphql" in data:
|
if "graphql" in data:
|
||||||
is_edge = True
|
is_edge = True
|
||||||
@ -795,34 +758,64 @@ class Instaloader:
|
|||||||
else:
|
else:
|
||||||
is_edge = False
|
is_edge = False
|
||||||
feed = data["feed"]["media"]
|
feed = data["feed"]["media"]
|
||||||
for edge_or_node in feed["edges"] if is_edge else feed["nodes"]:
|
|
||||||
|
if is_edge:
|
||||||
|
yield from [edge["node"] for edge in feed["edges"]]
|
||||||
|
else:
|
||||||
|
yield from [node for node in feed["nodes"]]
|
||||||
|
|
||||||
|
if not feed["page_info"]["has_next_page"]:
|
||||||
|
break
|
||||||
|
data = self.graphql_query(17863003771166879, {'fetch_media_item_count': 12,
|
||||||
|
'fetch_media_item_cursor': feed["page_info"]["end_cursor"],
|
||||||
|
'fetch_comment_count': 4,
|
||||||
|
'fetch_like': 10})
|
||||||
|
|
||||||
|
def download_feed_posts(self, max_count: int = None, fast_update: bool = False,
|
||||||
|
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
|
||||||
|
download_videos: bool = True, geotags: bool = False,
|
||||||
|
download_comments: bool = False) -> None:
|
||||||
|
"""
|
||||||
|
Download pictures from the user's feed.
|
||||||
|
|
||||||
|
Example to download up to the 20 pics the user last liked:
|
||||||
|
>>> loader = Instaloader()
|
||||||
|
>>> loader.load_session_from_file('USER')
|
||||||
|
>>> loader.download_feed_posts(max_count=20, fast_update=True,
|
||||||
|
>>> filter_func=lambda post:
|
||||||
|
>>> not post["likes"]["viewer_has_liked"]
|
||||||
|
>>> if "likes" in post else
|
||||||
|
>>> not post["viewer_has_liked"])
|
||||||
|
|
||||||
|
:param max_count: Maximum count of pictures to download
|
||||||
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||||
|
:param filter_func: function(post), which returns True if given picture should not be downloaded
|
||||||
|
:param download_videos: True, if videos should be downloaded
|
||||||
|
:param geotags: Download geotags
|
||||||
|
:param download_comments: Update comments
|
||||||
|
"""
|
||||||
|
count = 1
|
||||||
|
for post in self.get_feed_posts():
|
||||||
if max_count is not None and count > max_count:
|
if max_count is not None and count > max_count:
|
||||||
return
|
break
|
||||||
node = edge_or_node["node"] if is_edge else edge_or_node
|
name = post["owner"]["username"]
|
||||||
name = node["owner"]["username"]
|
if filter_func is not None and filter_func(post):
|
||||||
if filter_func is not None and filter_func(node):
|
|
||||||
self._log("<pic by %s skipped>" % name, flush=True)
|
self._log("<pic by %s skipped>" % name, flush=True)
|
||||||
continue
|
continue
|
||||||
self._log("[%3i] %s " % (count, name), end="", flush=True)
|
self._log("[%3i] %s " % (count, name), end="", flush=True)
|
||||||
count += 1
|
count += 1
|
||||||
try:
|
with self._error_catcher('Download feed'):
|
||||||
downloaded = self.download_node(node, profile=name, target=':feed',
|
downloaded = self.download_post(post, profile=name, target=':feed',
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
except NodeUnavailableException:
|
|
||||||
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {} from feed."
|
|
||||||
.format(node['shortcode'], name), file=sys.stderr)
|
|
||||||
continue
|
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
|
||||||
if not feed["page_info"]["has_next_page"]:
|
|
||||||
break
|
break
|
||||||
data = self.get_feed_json(end_cursor=feed["page_info"]["end_cursor"])
|
|
||||||
|
|
||||||
def get_hashtag_json(self, hashtag: str,
|
def get_hashtag_posts(self, hashtag: str) -> Iterator[Dict[str, Any]]:
|
||||||
max_id: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
"""Get Posts associated with a #hashtag."""
|
||||||
"""Return JSON of a #hashtag"""
|
yield from self.graphql_node_list(17875800862117404, {'tag_name': hashtag},
|
||||||
return self.get_json(name='explore/tags/{0}/'.format(hashtag), max_id=max_id)
|
'https://www.instagram.com/explore/tags/' + hashtag + '/',
|
||||||
|
lambda d: d['data']['hashtag']['edge_hashtag_to_media'])
|
||||||
|
|
||||||
def download_hashtag(self, hashtag: str,
|
def download_hashtag(self, hashtag: str,
|
||||||
max_count: Optional[int] = None,
|
max_count: Optional[int] = None,
|
||||||
@ -837,48 +830,38 @@ class Instaloader:
|
|||||||
|
|
||||||
:param hashtag: Hashtag to download, without leading '#'
|
:param hashtag: Hashtag to download, without leading '#'
|
||||||
:param max_count: Maximum count of pictures to download
|
:param max_count: Maximum count of pictures to download
|
||||||
:param filter_func: function(node), which returns True if given picture should not be downloaded
|
:param filter_func: function(post), which returns True if given picture should not be downloaded
|
||||||
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
:param fast_update: If true, abort when first already-downloaded picture is encountered
|
||||||
:param download_videos: True, if videos should be downloaded
|
:param download_videos: True, if videos should be downloaded
|
||||||
:param geotags: Download geotags
|
:param geotags: Download geotags
|
||||||
:param download_comments: Update comments
|
:param download_comments: Update comments
|
||||||
"""
|
"""
|
||||||
data = self.get_hashtag_json(hashtag)
|
|
||||||
count = 1
|
count = 1
|
||||||
while data:
|
for post in self.get_hashtag_posts(hashtag):
|
||||||
for node in data['entry_data']['TagPage'][0]['tag']['media']['nodes']:
|
|
||||||
if max_count is not None and count > max_count:
|
if max_count is not None and count > max_count:
|
||||||
return
|
break
|
||||||
self._log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
|
self._log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
|
||||||
if filter_func is not None and filter_func(node):
|
if filter_func is not None and filter_func(post):
|
||||||
self._log('<skipped>')
|
self._log('<skipped>')
|
||||||
continue
|
continue
|
||||||
count += 1
|
count += 1
|
||||||
try:
|
with self._error_catcher('Download hashtag #{}'.format(hashtag)):
|
||||||
downloaded = self.download_node(node=node, profile=None, target='#'+hashtag,
|
downloaded = self.download_post(node=post, profile=None, target='#' + hashtag,
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
except NodeUnavailableException:
|
|
||||||
print("Unable to download node \"https://www.instagram.com/p/{}/\" "
|
|
||||||
"while downloading hashtag \"{}\".".format(node['shortcode'], hashtag), file=sys.stderr)
|
|
||||||
continue
|
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
|
||||||
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
|
|
||||||
data = self.get_hashtag_json(hashtag,
|
|
||||||
max_id=data['entry_data']['TagPage'][0]['tag']['media']['page_info'][
|
|
||||||
'end_cursor'])
|
|
||||||
else:
|
|
||||||
break
|
break
|
||||||
|
|
||||||
def check_id(self, profile: str, json_data: Dict[str, Any]) -> Tuple[str, int]:
|
def check_profile_id(self, profile: str, profile_metadata: Optional[Dict[str, Any]] = None) -> Tuple[str, int]:
|
||||||
"""
|
"""
|
||||||
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
Consult locally stored ID of profile with given name, check whether ID matches and whether name
|
||||||
has changed and return current name of the profile, and store ID of profile.
|
has changed and return current name of the profile, and store ID of profile.
|
||||||
|
|
||||||
|
:param profile: Profile name
|
||||||
|
:param profile_metadata: The profile's metadata (get_profile_metadata()), or None if the profile was not found
|
||||||
:return: current profile name, profile id
|
:return: current profile name, profile id
|
||||||
"""
|
"""
|
||||||
profile_exists = "ProfilePage" in json_data["entry_data"]
|
profile_exists = profile_metadata is not None
|
||||||
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
|
||||||
format_string_contains_key(self.dirname_pattern, 'target'))):
|
format_string_contains_key(self.dirname_pattern, 'target'))):
|
||||||
id_filename = '{0}/id'.format(self.dirname_pattern.format(profile=profile.lower(),
|
id_filename = '{0}/id'.format(self.dirname_pattern.format(profile=profile.lower(),
|
||||||
@ -889,7 +872,7 @@ class Instaloader:
|
|||||||
with open(id_filename, 'rb') as id_file:
|
with open(id_filename, 'rb') as id_file:
|
||||||
profile_id = int(id_file.read())
|
profile_id = int(id_file.read())
|
||||||
if (not profile_exists) or \
|
if (not profile_exists) or \
|
||||||
(profile_id != int(json_data['entry_data']['ProfilePage'][0]['user']['id'])):
|
(profile_id != int(profile_metadata['user']['id'])):
|
||||||
if profile_exists:
|
if profile_exists:
|
||||||
self._log("Profile {0} does not match the stored unique ID {1}.".format(profile, profile_id))
|
self._log("Profile {0} does not match the stored unique ID {1}.".format(profile, profile_id))
|
||||||
else:
|
else:
|
||||||
@ -913,75 +896,95 @@ class Instaloader:
|
|||||||
os.makedirs(self.dirname_pattern.format(profile=profile.lower(),
|
os.makedirs(self.dirname_pattern.format(profile=profile.lower(),
|
||||||
target=profile.lower()), exist_ok=True)
|
target=profile.lower()), exist_ok=True)
|
||||||
with open(id_filename, 'w') as text_file:
|
with open(id_filename, 'w') as text_file:
|
||||||
profile_id = json_data['entry_data']['ProfilePage'][0]['user']['id']
|
profile_id = profile_metadata['user']['id']
|
||||||
text_file.write(profile_id + "\n")
|
text_file.write(profile_id + "\n")
|
||||||
self._log("Stored ID {0} for profile {1}.".format(profile_id, profile))
|
self._log("Stored ID {0} for profile {1}.".format(profile_id, profile))
|
||||||
return profile, profile_id
|
return profile, profile_id
|
||||||
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
raise ProfileNotExistsException("Profile {0} does not exist.".format(profile))
|
||||||
|
|
||||||
def download(self, name: str,
|
def get_profile_metadata(self, profile_name: str) -> Dict[str, Any]:
|
||||||
|
"""Retrieves a profile's metadata, for use with e.g. get_profile_posts() and check_profile_id()."""
|
||||||
|
try:
|
||||||
|
return self._get_json('{}/'.format(profile_name), params={'__a': 1})
|
||||||
|
except QueryReturnedNotFoundException:
|
||||||
|
raise ProfileNotExistsException('Profile {} does not exist.'.format(profile_name))
|
||||||
|
|
||||||
|
def get_profile_posts(self, profile_metadata: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
||||||
|
"""Retrieve all posts from a profile."""
|
||||||
|
yield from profile_metadata['user']['media']['nodes']
|
||||||
|
has_next_page = profile_metadata['user']['media']['page_info']['has_next_page']
|
||||||
|
end_cursor = profile_metadata['user']['media']['page_info']['end_cursor']
|
||||||
|
while has_next_page:
|
||||||
|
data = self.graphql_query(17888483320059182, {'id': profile_metadata['user']['id'],
|
||||||
|
'first': 500,
|
||||||
|
'after': end_cursor},
|
||||||
|
'https://www.instagram.com/{0}/'.format(profile_metadata['user']['username']))
|
||||||
|
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||||
|
yield from [edge['node'] for edge in media['edges']]
|
||||||
|
has_next_page = media['page_info']['has_next_page']
|
||||||
|
end_cursor = media['page_info']['end_cursor']
|
||||||
|
|
||||||
|
def download_profile(self, name: str,
|
||||||
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
||||||
download_comments: bool = False, fast_update: bool = False,
|
download_comments: bool = False, fast_update: bool = False,
|
||||||
download_stories: bool = False, download_stories_only: bool = False) -> None:
|
download_stories: bool = False, download_stories_only: bool = False) -> None:
|
||||||
"""Download one profile"""
|
"""Download one profile"""
|
||||||
|
|
||||||
# Get profile main page json
|
# Get profile main page json
|
||||||
data = self.get_json(name)
|
profile_metadata = None
|
||||||
|
with suppress(ProfileNotExistsException):
|
||||||
|
# ProfileNotExistsException is raised again later in check_profile_id() when we search the profile, so we
|
||||||
|
# must suppress it.
|
||||||
|
profile_metadata = self.get_profile_metadata(name)
|
||||||
|
|
||||||
# check if profile does exist or name has changed since last download
|
# check if profile does exist or name has changed since last download
|
||||||
# and update name and json data if necessary
|
# and update name and json data if necessary
|
||||||
name_updated, profile_id = self.check_id(name, data)
|
name_updated, profile_id = self.check_profile_id(name, profile_metadata)
|
||||||
if name_updated != name:
|
if name_updated != name:
|
||||||
name = name_updated
|
name = name_updated
|
||||||
data = self.get_json(name)
|
profile_metadata = self.get_profile_metadata(name)
|
||||||
|
|
||||||
# Download profile picture
|
# Download profile picture
|
||||||
try:
|
with self._error_catcher('Download profile picture of {}'.format(name)):
|
||||||
self.download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
|
self.download_profilepic(name, profile_metadata["user"]["profile_pic_url"])
|
||||||
except NodeUnavailableException:
|
|
||||||
print("Unable to download profilepic of user {}.".format(name), file=sys.stderr)
|
|
||||||
if profile_pic_only:
|
if profile_pic_only:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Catch some errors
|
# Catch some errors
|
||||||
if data["entry_data"]["ProfilePage"][0]["user"]["is_private"]:
|
if profile_metadata["user"]["is_private"]:
|
||||||
if data["config"]["viewer"] is None:
|
if self.username is None:
|
||||||
raise LoginRequiredException("profile %s requires login" % name)
|
raise LoginRequiredException("profile %s requires login" % name)
|
||||||
if not data["entry_data"]["ProfilePage"][0]["user"]["followed_by_viewer"] and \
|
if not profile_metadata["user"]["followed_by_viewer"] and \
|
||||||
self.username != data["entry_data"]["ProfilePage"][0]["user"]["username"]:
|
self.username != profile_metadata["user"]["username"]:
|
||||||
raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % name)
|
raise PrivateProfileNotFollowedException("Profile %s: private but not followed." % name)
|
||||||
else:
|
else:
|
||||||
if data["config"]["viewer"] is not None and not (download_stories or download_stories_only):
|
if self.username is not None and not (download_stories or download_stories_only):
|
||||||
self._log("profile %s could also be downloaded anonymously." % name)
|
self._log("profile %s could also be downloaded anonymously." % name)
|
||||||
|
|
||||||
|
# Download stories, if requested
|
||||||
if download_stories or download_stories_only:
|
if download_stories or download_stories_only:
|
||||||
self.download_stories(userids=[profile_id], filename_target=name,
|
self.download_stories(userids=[profile_id], filename_target=name,
|
||||||
download_videos=download_videos, fast_update=fast_update)
|
download_videos=download_videos, fast_update=fast_update)
|
||||||
if download_stories_only:
|
if download_stories_only:
|
||||||
return
|
return
|
||||||
if ("nodes" not in data["entry_data"]["ProfilePage"][0]["user"]["media"] or
|
|
||||||
not data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) \
|
if ("nodes" not in profile_metadata["user"]["media"] or
|
||||||
and not profile_pic_only:
|
not profile_metadata["user"]["media"]["nodes"]):
|
||||||
raise ProfileHasNoPicsException("Profile %s: no pics found." % name)
|
raise ProfileHasNoPicsException("Profile %s: no pics found." % name)
|
||||||
|
|
||||||
# Iterate over pictures and download them
|
# Iterate over pictures and download them
|
||||||
self._log("Retrieving posts from profile {}.".format(name))
|
self._log("Retrieving posts from profile {}.".format(name))
|
||||||
def get_last_id(data):
|
totalcount = profile_metadata["user"]["media"]["count"]
|
||||||
if data["entry_data"] and data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
|
||||||
return data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"][-1]["id"]
|
|
||||||
|
|
||||||
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
|
|
||||||
count = 1
|
count = 1
|
||||||
while get_last_id(data) is not None:
|
for post in self.get_profile_posts(profile_metadata):
|
||||||
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
|
||||||
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
||||||
count += 1
|
count += 1
|
||||||
try:
|
with self._error_catcher('Download profile {}'.format(name)):
|
||||||
downloaded = self.download_node(node=node, profile=name, target=name,
|
downloaded = self.download_post(node=post, profile=name, target=name,
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
except NodeUnavailableException:
|
|
||||||
print("Unable to download node \"https://www.instagram.com/p/{}/\" of user {}."
|
|
||||||
.format(node['shortcode'], name), file=sys.stderr)
|
|
||||||
continue
|
|
||||||
if fast_update and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
break
|
||||||
data = self.get_json(name, max_id=get_last_id(data))
|
|
||||||
|
|
||||||
def interactive_login(self, username: str) -> None:
|
def interactive_login(self, username: str) -> None:
|
||||||
"""Logs in and internally stores session, asking user for password interactively.
|
"""Logs in and internally stores session, asking user for password interactively.
|
||||||
@ -998,7 +1001,7 @@ class Instaloader:
|
|||||||
print(err, file=sys.stderr)
|
print(err, file=sys.stderr)
|
||||||
password = None
|
password = None
|
||||||
|
|
||||||
def download_profiles(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
|
def main(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
|
||||||
sessionfile: Optional[str] = None, max_count: Optional[int] = None,
|
sessionfile: Optional[str] = None, max_count: Optional[int] = None,
|
||||||
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
|
||||||
download_comments: bool = False,
|
download_comments: bool = False,
|
||||||
@ -1020,19 +1023,20 @@ class Instaloader:
|
|||||||
self.interactive_login(username)
|
self.interactive_login(username)
|
||||||
self._log("Logged in as %s." % username)
|
self._log("Logged in as %s." % username)
|
||||||
# Try block for KeyboardInterrupt (save session on ^C)
|
# Try block for KeyboardInterrupt (save session on ^C)
|
||||||
failedtargets = []
|
|
||||||
targets = set()
|
targets = set()
|
||||||
try:
|
try:
|
||||||
# Generate set of targets
|
# Generate set of targets
|
||||||
for pentry in profilelist:
|
for pentry in profilelist:
|
||||||
if pentry[0] == '#':
|
if pentry[0] == '#':
|
||||||
self._log("Retrieving pictures with hashtag {0}".format(pentry))
|
self._log("Retrieving pictures with hashtag {0}".format(pentry))
|
||||||
|
with self._error_catcher():
|
||||||
self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
|
self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
elif pentry[0] == '@':
|
elif pentry[0] == '@':
|
||||||
if username is not None:
|
if username is not None:
|
||||||
self._log("Retrieving followees of %s..." % pentry[1:])
|
self._log("Retrieving followees of %s..." % pentry[1:])
|
||||||
|
with self._error_catcher():
|
||||||
followees = self.get_followees(pentry[1:])
|
followees = self.get_followees(pentry[1:])
|
||||||
targets.update([followee['username'] for followee in followees])
|
targets.update([followee['username'] for followee in followees])
|
||||||
else:
|
else:
|
||||||
@ -1040,7 +1044,8 @@ class Instaloader:
|
|||||||
elif pentry == ":feed-all":
|
elif pentry == ":feed-all":
|
||||||
if username is not None:
|
if username is not None:
|
||||||
self._log("Retrieving pictures from your feed...")
|
self._log("Retrieving pictures from your feed...")
|
||||||
self.download_feed_pics(fast_update=fast_update, max_count=max_count,
|
with self._error_catcher():
|
||||||
|
self.download_feed_posts(fast_update=fast_update, max_count=max_count,
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
else:
|
else:
|
||||||
@ -1048,17 +1053,20 @@ class Instaloader:
|
|||||||
elif pentry == ":feed-liked":
|
elif pentry == ":feed-liked":
|
||||||
if username is not None:
|
if username is not None:
|
||||||
self._log("Retrieving pictures you liked from your feed...")
|
self._log("Retrieving pictures you liked from your feed...")
|
||||||
self.download_feed_pics(fast_update=fast_update, max_count=max_count,
|
def liked_filter(node):
|
||||||
filter_func=lambda node:
|
if "likes" in node:
|
||||||
not node["likes"]["viewer_has_liked"]
|
return not node["likes"]["viewer_has_liked"]
|
||||||
if "likes" in node
|
return not node["viewer_has_liked"]
|
||||||
else not node["viewer_has_liked"],
|
with self._error_catcher():
|
||||||
|
self.download_feed_posts(fast_update=fast_update, max_count=max_count,
|
||||||
|
filter_func=liked_filter,
|
||||||
download_videos=download_videos, geotags=geotags,
|
download_videos=download_videos, geotags=geotags,
|
||||||
download_comments=download_comments)
|
download_comments=download_comments)
|
||||||
else:
|
else:
|
||||||
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
||||||
elif pentry == ":stories":
|
elif pentry == ":stories":
|
||||||
if username is not None:
|
if username is not None:
|
||||||
|
with self._error_catcher():
|
||||||
self.download_stories(download_videos=download_videos, fast_update=fast_update)
|
self.download_stories(download_videos=download_videos, fast_update=fast_update)
|
||||||
else:
|
else:
|
||||||
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
|
||||||
@ -1068,9 +1076,9 @@ class Instaloader:
|
|||||||
self._log("Downloading %i profiles..." % len(targets))
|
self._log("Downloading %i profiles..." % len(targets))
|
||||||
# Iterate through targets list and download them
|
# Iterate through targets list and download them
|
||||||
for target in targets:
|
for target in targets:
|
||||||
|
with self._error_catcher():
|
||||||
try:
|
try:
|
||||||
try:
|
self.download_profile(target, profile_pic_only, download_videos,
|
||||||
self.download(target, profile_pic_only, download_videos,
|
|
||||||
geotags, download_comments, fast_update, stories, stories_only)
|
geotags, download_comments, fast_update, stories, stories_only)
|
||||||
except ProfileNotExistsException as err:
|
except ProfileNotExistsException as err:
|
||||||
if username is not None:
|
if username is not None:
|
||||||
@ -1078,21 +1086,21 @@ class Instaloader:
|
|||||||
self._log("Trying again anonymously, helps in case you are just blocked.")
|
self._log("Trying again anonymously, helps in case you are just blocked.")
|
||||||
anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output,
|
anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output,
|
||||||
self.user_agent, self.dirname_pattern, self.filename_pattern)
|
self.user_agent, self.dirname_pattern, self.filename_pattern)
|
||||||
anonymous_loader.download(target, profile_pic_only, download_videos,
|
anonymous_loader.error_log = self.error_log
|
||||||
|
with self._error_catcher():
|
||||||
|
anonymous_loader.download_profile(target, profile_pic_only, download_videos,
|
||||||
geotags, download_comments, fast_update)
|
geotags, download_comments, fast_update)
|
||||||
else:
|
else:
|
||||||
raise err
|
raise err
|
||||||
except NonfatalException as err:
|
|
||||||
failedtargets.append(target)
|
|
||||||
print(err, file=sys.stderr)
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("\nInterrupted by user.", file=sys.stderr)
|
print("\nInterrupted by user.", file=sys.stderr)
|
||||||
if len(targets) > 1 and failedtargets:
|
|
||||||
print("Errors occured (see above) while downloading profiles: %s." %
|
|
||||||
", ".join(failedtargets), file=sys.stderr)
|
|
||||||
# Save session if it is useful
|
# Save session if it is useful
|
||||||
if username is not None:
|
if username is not None:
|
||||||
self.save_session_to_file(sessionfile)
|
self.save_session_to_file(sessionfile)
|
||||||
|
if self.error_log:
|
||||||
|
print("\nErrors occured:", file=sys.stderr)
|
||||||
|
for err in self.error_log:
|
||||||
|
print(err, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -1194,7 +1202,7 @@ def main():
|
|||||||
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, shorter_output=args.shorter_output,
|
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, shorter_output=args.shorter_output,
|
||||||
user_agent=args.user_agent,
|
user_agent=args.user_agent,
|
||||||
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern)
|
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern)
|
||||||
loader.download_profiles(args.profile, args.login.lower() if args.login is not None else None, args.password,
|
loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password,
|
||||||
args.sessionfile,
|
args.sessionfile,
|
||||||
int(args.count) if args.count is not None else None,
|
int(args.count) if args.count is not None else None,
|
||||||
args.profile_pic_only, not args.skip_videos, args.geotags, args.comments,
|
args.profile_pic_only, not args.skip_videos, args.geotags, args.comments,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user