304 lines
18 KiB
Python
304 lines
18 KiB
Python
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
|
|
|
import ast
|
|
import os
|
|
import sys
|
|
from argparse import ArgumentParser, SUPPRESS
|
|
from typing import Callable, List, Optional
|
|
|
|
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
|
|
Tristate, __version__)
|
|
from .instaloader import get_default_session_filename
|
|
from .instaloadercontext import default_user_agent
|
|
|
|
|
|
def usage_string():
|
|
# NOTE: duplicated in README.rst and docs/index.rst
|
|
argv0 = os.path.basename(sys.argv[0])
|
|
argv0 = "instaloader" if argv0 == "__main__.py" else argv0
|
|
return """
|
|
{0} [--comments] [--geotags] [--stories]
|
|
{2:{1}} [--login YOUR-USERNAME] [--fast-update]
|
|
{2:{1}} profile | "#hashtag" | :stories | :feed | :saved
|
|
{0} --help""".format(argv0, len(argv0), '')
|
|
|
|
|
|
def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post'], bool]:
|
|
"""Takes an --only-if=... filter specification and makes a filter_func Callable out of it."""
|
|
|
|
# The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A
|
|
# function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace.
|
|
|
|
class TransformFilterAst(ast.NodeTransformer):
|
|
def visit_Name(self, node: ast.Name):
|
|
# pylint:disable=no-self-use
|
|
if not isinstance(node.ctx, ast.Load):
|
|
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
|
|
if not hasattr(Post, node.id):
|
|
raise InvalidArgumentException("Invalid filter: Name {} is not defined.".format(node.id))
|
|
if node.id in Post.LOGIN_REQUIRING_PROPERTIES and not logged_in:
|
|
raise InvalidArgumentException("Invalid filter: Name {} requires being logged in.".format(node.id))
|
|
new_node = ast.Attribute(ast.copy_location(ast.Name('post', ast.Load()), node), node.id,
|
|
ast.copy_location(ast.Load(), node))
|
|
return ast.copy_location(new_node, node)
|
|
|
|
input_filename = '<--only-if parameter>'
|
|
compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')),
|
|
filename=input_filename, mode='eval')
|
|
|
|
def filterfunc(post: 'Post') -> bool:
|
|
# pylint:disable=eval-used
|
|
return bool(eval(compiled_filter, {'post': post}))
|
|
|
|
return filterfunc
|
|
|
|
|
|
def _main(instaloader: Instaloader, targetlist: List[str],
|
|
username: Optional[str] = None, password: Optional[str] = None,
|
|
sessionfile: Optional[str] = None, max_count: Optional[int] = None,
|
|
profile_pic: bool = True, profile_pic_only: bool = False,
|
|
fast_update: bool = False,
|
|
stories: bool = False, stories_only: bool = False,
|
|
filter_str: Optional[str] = None) -> None:
|
|
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
|
|
# Parse and generate filter function
|
|
if filter_str is not None:
|
|
filter_func = filterstr_to_filterfunc(filter_str, username is not None)
|
|
instaloader.context.log('Only download posts with property "{}".'.format(filter_str))
|
|
else:
|
|
filter_func = None
|
|
# Login, if desired
|
|
if username is not None:
|
|
try:
|
|
instaloader.load_session_from_file(username, sessionfile)
|
|
except FileNotFoundError as err:
|
|
if sessionfile is not None:
|
|
print(err, file=sys.stderr)
|
|
instaloader.context.log("Session file does not exist yet - Logging in.")
|
|
if not instaloader.context.is_logged_in or username != instaloader.test_login():
|
|
if password is not None:
|
|
instaloader.login(username, password)
|
|
else:
|
|
instaloader.interactive_login(username)
|
|
instaloader.context.log("Logged in as %s." % username)
|
|
# Try block for KeyboardInterrupt (save session on ^C)
|
|
profiles = set()
|
|
try:
|
|
# Generate set of profiles, already downloading non-profile targets
|
|
for target in targetlist:
|
|
# strip '/' characters to be more shell-autocompletion-friendly
|
|
target = target.rstrip('/')
|
|
with instaloader.context.error_catcher(target):
|
|
if target[0] == '@':
|
|
instaloader.context.log("Retrieving followees of %s..." % target[1:])
|
|
profile = Profile.from_username(instaloader.context, target[1:])
|
|
followees = profile.get_followees()
|
|
profiles.update([followee['username'] for followee in followees])
|
|
elif target[0] == '#':
|
|
instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update,
|
|
filter_func=filter_func)
|
|
elif target == ":feed":
|
|
instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count,
|
|
filter_func=filter_func)
|
|
elif target == ":stories":
|
|
instaloader.download_stories(fast_update=fast_update)
|
|
elif target == ":saved":
|
|
instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count,
|
|
filter_func=filter_func)
|
|
else:
|
|
profiles.add(target)
|
|
if len(profiles) > 1:
|
|
instaloader.context.log("Downloading {} profiles: {}".format(len(profiles), ','.join(profiles)))
|
|
# Iterate through profiles list and download them
|
|
for target in profiles:
|
|
with instaloader.context.error_catcher(target):
|
|
try:
|
|
instaloader.download_profile(target, profile_pic, profile_pic_only, fast_update,
|
|
stories, stories_only, filter_func=filter_func)
|
|
except ProfileNotExistsException as err:
|
|
if not instaloader.context.is_logged_in:
|
|
instaloader.context.log(err)
|
|
instaloader.context.log("Trying again anonymously, helps in case you are just blocked.")
|
|
with instaloader.anonymous_copy() as anonymous_loader:
|
|
with instaloader.context.error_catcher():
|
|
anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
|
|
fast_update, filter_func=filter_func)
|
|
else:
|
|
raise err
|
|
except KeyboardInterrupt:
|
|
print("\nInterrupted by user.", file=sys.stderr)
|
|
# Save session if it is useful
|
|
if instaloader.context.is_logged_in:
|
|
instaloader.save_session_to_file(sessionfile)
|
|
# User might be confused if Instaloader does nothing
|
|
if not targetlist:
|
|
if instaloader.context.is_logged_in:
|
|
# Instaloader did at least save a session file
|
|
instaloader.context.log("No targets were specified, thus nothing has been downloaded.")
|
|
else:
|
|
# Instloader did not do anything
|
|
instaloader.context.log("usage:" + usage_string())
|
|
|
|
|
|
def main():
|
|
parser = ArgumentParser(description=__doc__, add_help=False, usage=usage_string(),
|
|
epilog="Report issues at https://github.com/instaloader/instaloader/issues. "
|
|
"The complete documentation can be found at "
|
|
"https://instaloader.github.io/.")
|
|
|
|
g_what = parser.add_argument_group('What to Download',
|
|
'Specify a list of profiles or #hashtags. For each of these, Instaloader '
|
|
'creates a folder and '
|
|
'downloads all posts along with the pictures\'s '
|
|
'captions and the current profile picture. '
|
|
'If an already-downloaded profile has been renamed, Instaloader automatically '
|
|
'finds it by its unique ID and renames the folder likewise.')
|
|
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
|
|
help='Name of profile or #hashtag to download. '
|
|
'Alternatively, if --login is given: @<profile> to download all followees of '
|
|
'<profile>; the special targets '
|
|
':feed to download pictures from your feed; '
|
|
':stories to download the stories of your followees; or '
|
|
':saved to download the posts marked as saved.')
|
|
g_what.add_argument('-P', '--profile-pic-only', action='store_true',
|
|
help='Only download profile picture.')
|
|
g_what.add_argument('--no-profile-pic', action='store_true',
|
|
help='Do not download profile picture.')
|
|
g_what.add_argument('-V', '--no-videos', action='store_true',
|
|
help='Do not download videos.')
|
|
g_what.add_argument('--no-video-thumbnails', action='store_true',
|
|
help='Do not download thumbnails of videos.')
|
|
g_what.add_argument('-G', '--geotags', action='store_true',
|
|
help='Download geotags when available. Geotags are stored as a '
|
|
'text file with the location\'s name and a Google Maps link. '
|
|
'This requires an additional request to the Instagram '
|
|
'server for each picture, which is why it is disabled by default.')
|
|
g_what.add_argument('--no-geotags', action='store_true',
|
|
help='Do not store geotags, even if they can be obtained without any additional request.')
|
|
g_what.add_argument('-C', '--comments', action='store_true',
|
|
help='Download and update comments for each post. '
|
|
'This requires an additional request to the Instagram '
|
|
'server for each post, which is why it is disabled by default.')
|
|
g_what.add_argument('--no-captions', action='store_true',
|
|
help='Do not store media captions, although no additional request is needed to obtain them.')
|
|
g_what.add_argument('--metadata-json', action='store_true',
|
|
help='Create a JSON file containing the metadata of each post. This does not include comments '
|
|
'nor geotags.')
|
|
g_what.add_argument('-s', '--stories', action='store_true',
|
|
help='Also download stories of each profile that is downloaded. Requires --login.')
|
|
g_what.add_argument('--stories-only', action='store_true',
|
|
help='Rather than downloading regular posts of each specified profile, only download '
|
|
'stories. Requires --login. Does not imply --no-profile-pic.')
|
|
g_what.add_argument('--only-if', metavar='filter',
|
|
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
|
|
'a syntactically valid python expression. Variables are evaluated to '
|
|
'instaloader.Post attributes. Example: --only-if=viewer_has_liked.')
|
|
|
|
g_stop = parser.add_argument_group('When to Stop Downloading',
|
|
'If none of these options are given, Instaloader goes through all pictures '
|
|
'matching the specified targets.')
|
|
g_stop.add_argument('-F', '--fast-update', action='store_true',
|
|
help='For each target, stop when encountering the first already-downloaded picture. This '
|
|
'flag is recommended when you use Instaloader to update your personal Instagram archive.')
|
|
g_stop.add_argument('-c', '--count',
|
|
help='Do not attempt to download more than COUNT posts. '
|
|
'Applies only to #hashtag and :feed.')
|
|
|
|
g_login = parser.add_argument_group('Login (Download Private Profiles)',
|
|
'Instaloader can login to Instagram. This allows downloading private profiles. '
|
|
'To login, pass the --login option. Your session cookie (not your password!) '
|
|
'will be saved to a local file to be reused next time you want Instaloader '
|
|
'to login.')
|
|
g_login.add_argument('-l', '--login', metavar='YOUR-USERNAME',
|
|
help='Login name (profile name) for your Instagram account.')
|
|
g_login.add_argument('-f', '--sessionfile',
|
|
help='Path for loading and storing session key file. '
|
|
'Defaults to ' + get_default_session_filename("<login_name>"))
|
|
g_login.add_argument('-p', '--password', metavar='YOUR-PASSWORD',
|
|
help='Password for your Instagram account. Without this option, '
|
|
'you\'ll be prompted for your password interactively if '
|
|
'there is not yet a valid session file.')
|
|
|
|
g_how = parser.add_argument_group('How to Download')
|
|
g_how.add_argument('--dirname-pattern',
|
|
help='Name of directory where to store posts. {profile} is replaced by the profile name, '
|
|
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
|
'profile name. Defaults to \'{target}\'.')
|
|
g_how.add_argument('--filename-pattern',
|
|
help='Prefix of filenames. Posts are stored in the directory whose pattern is given with '
|
|
'--dirname-pattern. {profile} is replaced by the profile name, '
|
|
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
|
'profile name. Also, the fields {date} and {shortcode} can be specified. In case of not '
|
|
'downloading stories, the attributes of the Post class can be used in addition, e.g. '
|
|
'{post.owner_id} or {post.mediaid}. Defaults to \'{date:%%Y-%%m-%%d_%%H-%%M-%%S}\'.')
|
|
g_how.add_argument('--user-agent',
|
|
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
|
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
|
|
g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3,
|
|
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
|
|
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
|
|
'infinitely.')
|
|
|
|
g_misc = parser.add_argument_group('Miscellaneous Options')
|
|
g_misc.add_argument('-q', '--quiet', action='store_true',
|
|
help='Disable user interaction, i.e. do not print messages (except errors) and fail '
|
|
'if login credentials are needed but not given. This makes Instaloader suitable as a '
|
|
'cron job.')
|
|
g_misc.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
|
|
g_misc.add_argument('--version', action='version', help='Show version number and exit.',
|
|
version=__version__)
|
|
|
|
args = parser.parse_args()
|
|
try:
|
|
if args.login is None and (args.stories or args.stories_only):
|
|
print("--login=USERNAME required to download stories.", file=sys.stderr)
|
|
args.stories = False
|
|
if args.stories_only:
|
|
raise SystemExit(1)
|
|
|
|
if ':feed-all' in args.profile or ':feed-liked' in args.profile:
|
|
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
|
|
"eventually --only-if=viewer_has_liked.")
|
|
|
|
download_videos = Tristate.always if not args.no_videos else Tristate.no_extra_query
|
|
download_video_thumbnails = Tristate.always if not args.no_video_thumbnails else Tristate.never
|
|
download_comments = Tristate.always if args.comments else Tristate.no_extra_query
|
|
save_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
|
|
save_metadata = Tristate.always if args.metadata_json else Tristate.never
|
|
|
|
if args.geotags and args.no_geotags:
|
|
raise SystemExit("--geotags and --no-geotags given. I am confused and refuse to work.")
|
|
elif args.geotags:
|
|
download_geotags = Tristate.always
|
|
elif args.no_geotags:
|
|
download_geotags = Tristate.never
|
|
else:
|
|
download_geotags = Tristate.no_extra_query
|
|
|
|
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet,
|
|
user_agent=args.user_agent,
|
|
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
|
|
download_videos=download_videos, download_video_thumbnails=download_video_thumbnails,
|
|
download_geotags=download_geotags,
|
|
save_captions=save_captions, download_comments=download_comments,
|
|
save_metadata=save_metadata, max_connection_attempts=args.max_connection_attempts)
|
|
_main(loader,
|
|
args.profile,
|
|
username=args.login.lower() if args.login is not None else None,
|
|
password=args.password,
|
|
sessionfile=args.sessionfile,
|
|
max_count=int(args.count) if args.count is not None else None,
|
|
profile_pic=not args.no_profile_pic,
|
|
profile_pic_only=args.profile_pic_only,
|
|
fast_update=args.fast_update,
|
|
stories=args.stories,
|
|
stories_only=args.stories_only,
|
|
filter_str=args.only_if)
|
|
loader.close()
|
|
except InstaloaderException as err:
|
|
raise SystemExit("Fatal error: %s" % err)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|