
The commit mode ensures pictures are not corrupted when Instaloader is unexpectedly interrupted. In the case that the last picture is corrupted because of an interruption, Instaloader will redownload the picture. Since the metadata is the last object saved to disk, we can consider a post as "committed" if its json metadata file exists and is not malformed. Instaloader should download any posts which are not committed. Downside is commit mode requires metadata to be saved.
404 lines
25 KiB
Python
404 lines
25 KiB
Python
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
|
|
|
import ast
|
|
import datetime
|
|
import os
|
|
import sys
|
|
from argparse import ArgumentParser, SUPPRESS
|
|
from typing import List, Optional
|
|
|
|
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
|
|
StoryItem, __version__, load_structure_from_file)
|
|
from .instaloader import get_default_session_filename
|
|
from .instaloadercontext import default_user_agent
|
|
|
|
|
|
def usage_string():
|
|
# NOTE: duplicated in README.rst and docs/index.rst
|
|
argv0 = os.path.basename(sys.argv[0])
|
|
argv0 = "instaloader" if argv0 == "__main__.py" else argv0
|
|
return """
|
|
{0} [--comments] [--geotags] [--stories] [--highlights] [--tagged]
|
|
{2:{1}} [--login YOUR-USERNAME] [--fast-update]
|
|
{2:{1}} profile | "#hashtag" | :stories | :feed | :saved
|
|
{0} --help""".format(argv0, len(argv0), '')
|
|
|
|
|
|
def filterstr_to_filterfunc(filter_str: str, item_type: type):
|
|
"""Takes an --post-filter=... or --storyitem-filter=... filter
|
|
specification and makes a filter_func Callable out of it."""
|
|
|
|
# The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A
|
|
# function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace.
|
|
|
|
class TransformFilterAst(ast.NodeTransformer):
|
|
def visit_Name(self, node: ast.Name):
|
|
# pylint:disable=no-self-use
|
|
if not isinstance(node.ctx, ast.Load):
|
|
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
|
|
if node.id == "datetime":
|
|
return node
|
|
if not hasattr(item_type, node.id):
|
|
raise InvalidArgumentException("Invalid filter: {} not a {} attribute.".format(node.id,
|
|
item_type.__name__))
|
|
new_node = ast.Attribute(ast.copy_location(ast.Name('item', ast.Load()), node), node.id,
|
|
ast.copy_location(ast.Load(), node))
|
|
return ast.copy_location(new_node, node)
|
|
|
|
input_filename = '<command line filter parameter>'
|
|
compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')),
|
|
filename=input_filename, mode='eval')
|
|
|
|
def filterfunc(item) -> bool:
|
|
# pylint:disable=eval-used
|
|
return bool(eval(compiled_filter, {'item': item, 'datetime': datetime.datetime}))
|
|
|
|
return filterfunc
|
|
|
|
|
|
def _main(instaloader: Instaloader, targetlist: List[str],
|
|
username: Optional[str] = None, password: Optional[str] = None,
|
|
sessionfile: Optional[str] = None,
|
|
download_profile_pic: bool = True, download_posts=True,
|
|
download_stories: bool = False, download_highlights: bool = False, download_tagged: bool = False,
|
|
fast_update: bool = False,
|
|
max_count: Optional[int] = None, post_filter_str: Optional[str] = None,
|
|
storyitem_filter_str: Optional[str] = None) -> None:
|
|
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
|
|
# Parse and generate filter function
|
|
post_filter = None
|
|
if post_filter_str is not None:
|
|
post_filter = filterstr_to_filterfunc(post_filter_str, Post)
|
|
instaloader.context.log('Only download posts with property "{}".'.format(post_filter_str))
|
|
storyitem_filter = None
|
|
if storyitem_filter_str is not None:
|
|
storyitem_filter = filterstr_to_filterfunc(storyitem_filter_str, StoryItem)
|
|
instaloader.context.log('Only download storyitems with property "{}".'.format(storyitem_filter_str))
|
|
# Login, if desired
|
|
if username is not None:
|
|
try:
|
|
instaloader.load_session_from_file(username, sessionfile)
|
|
except FileNotFoundError as err:
|
|
if sessionfile is not None:
|
|
print(err, file=sys.stderr)
|
|
instaloader.context.log("Session file does not exist yet - Logging in.")
|
|
if not instaloader.context.is_logged_in or username != instaloader.test_login():
|
|
if password is not None:
|
|
instaloader.login(username, password)
|
|
else:
|
|
instaloader.interactive_login(username)
|
|
instaloader.context.log("Logged in as %s." % username)
|
|
# Try block for KeyboardInterrupt (save session on ^C)
|
|
profiles = set()
|
|
anonymous_retry_profiles = set()
|
|
try:
|
|
# Generate set of profiles, already downloading non-profile targets
|
|
for target in targetlist:
|
|
if (target.endswith('.json') or target.endswith('.json.xz')) and os.path.isfile(target):
|
|
with instaloader.context.error_catcher(target):
|
|
structure = load_structure_from_file(instaloader.context, target)
|
|
if isinstance(structure, Post):
|
|
if post_filter is not None and not post_filter(structure):
|
|
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
|
|
continue
|
|
instaloader.context.log("Downloading {} ({})".format(structure, target))
|
|
instaloader.download_post(structure, os.path.dirname(target))
|
|
elif isinstance(structure, StoryItem):
|
|
if storyitem_filter is not None and not storyitem_filter(structure):
|
|
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
|
|
continue
|
|
instaloader.context.log("Attempting to download {} ({})".format(structure, target))
|
|
instaloader.download_storyitem(structure, os.path.dirname(target))
|
|
elif isinstance(structure, Profile):
|
|
raise InvalidArgumentException("Profile JSON are ignored. Pass \"{}\" to download that profile"
|
|
.format(structure.username))
|
|
else:
|
|
raise InvalidArgumentException("{} JSON file not supported as target"
|
|
.format(structure.__class__.__name__))
|
|
continue
|
|
# strip '/' characters to be more shell-autocompletion-friendly
|
|
target = target.rstrip('/')
|
|
with instaloader.context.error_catcher(target):
|
|
if target[0] == '@':
|
|
instaloader.context.log("Retrieving followees of %s..." % target[1:])
|
|
profile = Profile.from_username(instaloader.context, target[1:])
|
|
for followee in profile.get_followees():
|
|
instaloader.save_profile_id(followee)
|
|
profiles.add(followee)
|
|
elif target[0] == '#':
|
|
instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update,
|
|
post_filter=post_filter)
|
|
elif target[0] == '-':
|
|
instaloader.download_post(Post.from_shortcode(instaloader.context, target[1:]), target)
|
|
elif target == ":feed":
|
|
instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count,
|
|
post_filter=post_filter)
|
|
elif target == ":stories":
|
|
instaloader.download_stories(fast_update=fast_update, storyitem_filter=storyitem_filter)
|
|
elif target == ":saved":
|
|
instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count,
|
|
post_filter=post_filter)
|
|
else:
|
|
try:
|
|
profile = instaloader.check_profile_id(target)
|
|
if instaloader.context.is_logged_in and profile.has_blocked_viewer:
|
|
if download_profile_pic or ((download_posts or download_tagged) and not profile.is_private):
|
|
raise ProfileNotExistsException("{} blocked you; But we download her anonymously."
|
|
.format(target))
|
|
else:
|
|
instaloader.context.error("{} blocked you.".format(target))
|
|
else:
|
|
profiles.add(profile)
|
|
except ProfileNotExistsException as err:
|
|
# Not only our profile.has_blocked_viewer condition raises ProfileNotExistsException,
|
|
# check_profile_id() also does, since access to blocked profile may be responded with 404.
|
|
if instaloader.context.is_logged_in and (download_profile_pic or download_posts or
|
|
download_tagged):
|
|
instaloader.context.log(err)
|
|
instaloader.context.log("Trying again anonymously, helps in case you are just blocked.")
|
|
with instaloader.anonymous_copy() as anonymous_loader:
|
|
with instaloader.context.error_catcher():
|
|
anonymous_retry_profiles.add(anonymous_loader.check_profile_id(target))
|
|
instaloader.context.error("Warning: {} will be downloaded anonymously (\"{}\")."
|
|
.format(target, err))
|
|
else:
|
|
raise
|
|
if len(profiles) > 1:
|
|
instaloader.context.log("Downloading {} profiles: {}".format(len(profiles),
|
|
' '.join([p.username for p in profiles])))
|
|
instaloader.download_profiles(profiles,
|
|
download_profile_pic, download_posts, download_tagged, download_highlights,
|
|
download_stories, fast_update, post_filter, storyitem_filter)
|
|
if anonymous_retry_profiles:
|
|
instaloader.context.log("Downloading anonymously: {}"
|
|
.format(' '.join([p.username for p in anonymous_retry_profiles])))
|
|
with instaloader.anonymous_copy() as anonymous_loader:
|
|
anonymous_loader.download_profiles(anonymous_retry_profiles,
|
|
download_profile_pic, download_posts, download_tagged,
|
|
fast_update=fast_update, post_filter=post_filter)
|
|
except KeyboardInterrupt:
|
|
print("\nInterrupted by user.", file=sys.stderr)
|
|
# Save session if it is useful
|
|
if instaloader.context.is_logged_in:
|
|
instaloader.save_session_to_file(sessionfile)
|
|
# User might be confused if Instaloader does nothing
|
|
if not targetlist:
|
|
if instaloader.context.is_logged_in:
|
|
# Instaloader did at least save a session file
|
|
instaloader.context.log("No targets were specified, thus nothing has been downloaded.")
|
|
else:
|
|
# Instloader did not do anything
|
|
instaloader.context.log("usage:" + usage_string())
|
|
|
|
|
|
def main():
|
|
parser = ArgumentParser(description=__doc__, add_help=False, usage=usage_string(),
|
|
epilog="Report issues at https://github.com/instaloader/instaloader/issues. "
|
|
"The complete documentation can be found at "
|
|
"https://instaloader.github.io/.",
|
|
fromfile_prefix_chars='+')
|
|
|
|
g_targets = parser.add_argument_group("What to Download",
|
|
"Specify a list of targets. For each of these, Instaloader creates a folder "
|
|
"and downloads all posts. The following targets are supported:")
|
|
g_targets.add_argument('profile', nargs='*',
|
|
help="Download profile. If an already-downloaded profile has been renamed, Instaloader "
|
|
"automatically finds it by its unique ID and renames the folder likewise.")
|
|
g_targets.add_argument('_at_profile', nargs='*', metavar="@profile",
|
|
help="Download all followees of profile. Requires --login. "
|
|
"Consider using :feed rather than @yourself.")
|
|
g_targets.add_argument('_hashtag', nargs='*', metavar='"#hashtag"', help="Download #hashtag.")
|
|
g_targets.add_argument('_feed', nargs='*', metavar=":feed",
|
|
help="Download pictures from your feed. Requires --login.")
|
|
g_targets.add_argument('_stories', nargs='*', metavar=":stories",
|
|
help="Download the stories of your followees. Requires --login.")
|
|
g_targets.add_argument('_saved', nargs='*', metavar=":saved",
|
|
help="Download the posts that you marked as saved. Requires --login.")
|
|
g_targets.add_argument('_singlepost', nargs='*', metavar="-- -shortcode",
|
|
help="Download the post with the given shortcode")
|
|
g_targets.add_argument('_json', nargs='*', metavar="filename.json[.xz]",
|
|
help="Re-Download the given object.")
|
|
g_targets.add_argument('_fromfile', nargs='*', metavar="+args.txt",
|
|
help="Read targets (and options) from given textfile.")
|
|
|
|
g_post = parser.add_argument_group("What to Download of each Post")
|
|
|
|
g_prof = parser.add_argument_group("What to Download of each Profile")
|
|
|
|
g_prof.add_argument('-P', '--profile-pic-only', action='store_true',
|
|
help=SUPPRESS)
|
|
g_prof.add_argument('--no-posts', action='store_true',
|
|
help="Do not download regular posts.")
|
|
g_prof.add_argument('--no-profile-pic', action='store_true',
|
|
help='Do not download profile picture.')
|
|
g_post.add_argument('--no-pictures', action='store_true',
|
|
help='Do not download post pictures. Cannot be used together with --fast-update. '
|
|
'Implies --no-video-thumbnails, does not imply --no-videos.')
|
|
g_post.add_argument('-V', '--no-videos', action='store_true',
|
|
help='Do not download videos.')
|
|
g_post.add_argument('--no-video-thumbnails', action='store_true',
|
|
help='Do not download thumbnails of videos.')
|
|
g_post.add_argument('-G', '--geotags', action='store_true',
|
|
help='Download geotags when available. Geotags are stored as a '
|
|
'text file with the location\'s name and a Google Maps link. '
|
|
'This requires an additional request to the Instagram '
|
|
'server for each picture, which is why it is disabled by default.')
|
|
g_post.add_argument('-C', '--comments', action='store_true',
|
|
help='Download and update comments for each post. '
|
|
'This requires an additional request to the Instagram '
|
|
'server for each post, which is why it is disabled by default.')
|
|
g_post.add_argument('--no-captions', action='store_true',
|
|
help='Do not create txt files.')
|
|
g_post.add_argument('--post-metadata-txt', action='append',
|
|
help='Template to write in txt file for each Post.')
|
|
g_post.add_argument('--storyitem-metadata-txt', action='append',
|
|
help='Template to write in txt file for each StoryItem.')
|
|
g_post.add_argument('--no-metadata-json', action='store_true',
|
|
help='Do not create a JSON file containing the metadata of each post.')
|
|
g_post.add_argument('--metadata-json', action='store_true',
|
|
help=SUPPRESS)
|
|
g_post.add_argument('--no-compress-json', action='store_true',
|
|
help='Do not xz compress JSON files, rather create pretty formatted JSONs.')
|
|
g_prof.add_argument('-s', '--stories', action='store_true',
|
|
help='Also download stories of each profile that is downloaded. Requires --login.')
|
|
g_prof.add_argument('--stories-only', action='store_true',
|
|
help=SUPPRESS)
|
|
g_prof.add_argument('--highlights', action='store_true',
|
|
help='Also download highlights of each profile that is downloaded. Requires --login.')
|
|
g_prof.add_argument('--tagged', action='store_true',
|
|
help='Also download posts where each profile is tagged.')
|
|
|
|
g_cond = parser.add_argument_group("Which Posts to Download")
|
|
|
|
g_cond.add_argument('-F', '--fast-update', action='store_true',
|
|
help='For each target, stop when encountering the first already-downloaded picture. This '
|
|
'flag is recommended when you use Instaloader to update your personal Instagram archive.')
|
|
|
|
g_cond.add_argument('--post-filter', '--only-if', metavar='filter',
|
|
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
|
|
'a syntactically valid python expression. Variables are evaluated to '
|
|
'instaloader.Post attributes. Example: --post-filter=viewer_has_liked.')
|
|
g_cond.add_argument('--storyitem-filter', metavar='filter',
|
|
help='Expression that, if given, must evaluate to True for each storyitem to be downloaded. '
|
|
'Must be a syntactically valid python expression. Variables are evaluated to '
|
|
'instaloader.StoryItem attributes.')
|
|
|
|
g_cond.add_argument('-c', '--count',
|
|
help='Do not attempt to download more than COUNT posts. '
|
|
'Applies only to #hashtag and :feed.')
|
|
|
|
g_login = parser.add_argument_group('Login (Download Private Profiles)',
|
|
'Instaloader can login to Instagram. This allows downloading private profiles. '
|
|
'To login, pass the --login option. Your session cookie (not your password!) '
|
|
'will be saved to a local file to be reused next time you want Instaloader '
|
|
'to login.')
|
|
g_login.add_argument('-l', '--login', metavar='YOUR-USERNAME',
|
|
help='Login name (profile name) for your Instagram account.')
|
|
g_login.add_argument('-f', '--sessionfile',
|
|
help='Path for loading and storing session key file. '
|
|
'Defaults to ' + get_default_session_filename("<login_name>"))
|
|
g_login.add_argument('-p', '--password', metavar='YOUR-PASSWORD',
|
|
help='Password for your Instagram account. Without this option, '
|
|
'you\'ll be prompted for your password interactively if '
|
|
'there is not yet a valid session file.')
|
|
|
|
g_how = parser.add_argument_group('How to Download')
|
|
g_how.add_argument('--dirname-pattern',
|
|
help='Name of directory where to store posts. {profile} is replaced by the profile name, '
|
|
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
|
|
'profile name. Defaults to \'{target}\'.')
|
|
g_how.add_argument('--filename-pattern',
|
|
help='Prefix of filenames, relative to the directory given with '
|
|
'--dirname-pattern. {profile} is replaced by the profile name,'
|
|
'{target} is replaced by the target you specified, i.e. either :feed'
|
|
'#hashtag or the profile name. Defaults to \'{date_utc}_UTC\'')
|
|
g_how.add_argument('--user-agent',
|
|
help='User Agent to use for HTTP requests. Defaults to \'{}\'.'.format(default_user_agent()))
|
|
g_how.add_argument('-S', '--no-sleep', action='store_true', help=SUPPRESS)
|
|
g_how.add_argument('--graphql-rate-limit', type=int, help=SUPPRESS)
|
|
g_how.add_argument('--max-connection-attempts', metavar='N', type=int, default=3,
|
|
help='Maximum number of connection attempts until a request is aborted. Defaults to 3. If a '
|
|
'connection fails, it can be manually skipped by hitting CTRL+C. Set this to 0 to retry '
|
|
'infinitely.')
|
|
g_how.add_argument('--commit-mode', action='store_true',
|
|
help='Tries to ensure downloaded images avoid corruption in case of unexpected interruption. '
|
|
'If the last picture is corrupted, Instaloader will fix the picture the next time it is run. '
|
|
'Requires the JSON metadata to be saved .')
|
|
|
|
g_misc = parser.add_argument_group('Miscellaneous Options')
|
|
g_misc.add_argument('-q', '--quiet', action='store_true',
|
|
help='Disable user interaction, i.e. do not print messages (except errors) and fail '
|
|
'if login credentials are needed but not given. This makes Instaloader suitable as a '
|
|
'cron job.')
|
|
g_misc.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
|
|
g_misc.add_argument('--version', action='version', help='Show version number and exit.',
|
|
version=__version__)
|
|
|
|
args = parser.parse_args()
|
|
try:
|
|
if args.login is None and (args.stories or args.stories_only):
|
|
print("--login=USERNAME required to download stories.", file=sys.stderr)
|
|
args.stories = False
|
|
if args.stories_only:
|
|
raise SystemExit(1)
|
|
|
|
if ':feed-all' in args.profile or ':feed-liked' in args.profile:
|
|
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
|
|
"eventually --post-filter=viewer_has_liked.")
|
|
|
|
post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None
|
|
storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None
|
|
|
|
if args.no_captions:
|
|
if not (post_metadata_txt_pattern or storyitem_metadata_txt_pattern):
|
|
post_metadata_txt_pattern = ''
|
|
storyitem_metadata_txt_pattern = ''
|
|
else:
|
|
raise SystemExit("--no-captions and --post-metadata-txt or --storyitem-metadata-txt given; "
|
|
"That contradicts.")
|
|
|
|
if args.no_pictures and args.fast_update:
|
|
raise SystemExit('--no-pictures and --fast-update cannot be used together.')
|
|
|
|
# Determine what to download
|
|
download_profile_pic = not args.no_profile_pic or args.profile_pic_only
|
|
download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only)
|
|
download_stories = args.stories or args.stories_only
|
|
|
|
if args.commit_mode and args.no_metadata_json:
|
|
raise SystemExit('--commit-mode requires JSON metadata to be saved.')
|
|
|
|
loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent,
|
|
dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern,
|
|
download_pictures=not args.no_pictures,
|
|
download_videos=not args.no_videos, download_video_thumbnails=not args.no_video_thumbnails,
|
|
download_geotags=args.geotags,
|
|
download_comments=args.comments, save_metadata=not args.no_metadata_json,
|
|
compress_json=not args.no_compress_json,
|
|
post_metadata_txt_pattern=post_metadata_txt_pattern,
|
|
storyitem_metadata_txt_pattern=storyitem_metadata_txt_pattern,
|
|
graphql_rate_limit=args.graphql_rate_limit,
|
|
max_connection_attempts=args.max_connection_attempts,
|
|
commit_mode=args.commit_mode)
|
|
_main(loader,
|
|
args.profile,
|
|
username=args.login.lower() if args.login is not None else None,
|
|
password=args.password,
|
|
sessionfile=args.sessionfile,
|
|
download_profile_pic=download_profile_pic,
|
|
download_posts=download_posts,
|
|
download_stories=download_stories,
|
|
download_highlights=args.highlights,
|
|
download_tagged=args.tagged,
|
|
fast_update=args.fast_update,
|
|
max_count=int(args.count) if args.count is not None else None,
|
|
post_filter_str=args.post_filter,
|
|
storyitem_filter_str=args.storyitem_filter)
|
|
loader.close()
|
|
except InstaloaderException as err:
|
|
raise SystemExit("Fatal error: %s" % err)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|