diff --git a/README.rst b/README.rst index 3e03d4c..64a9e82 100644 --- a/README.rst +++ b/README.rst @@ -21,7 +21,7 @@ :: - instaloader [--comments] [--geotags] [--stories] + instaloader [--comments] [--geotags] [--stories] [--highlights] [--tagged] [--login YOUR-USERNAME] [--fast-update] profile | "#hashtag" | :stories | :feed | :saved diff --git a/docs/_static/style.css b/docs/_static/style.css index 80010e1..ee4203f 100644 --- a/docs/_static/style.css +++ b/docs/_static/style.css @@ -18,3 +18,11 @@ a { a:hover { color: #f48400; } + +body { + font-size: 16px; +} + +.versionmodified { + font-size: 14px; +} diff --git a/docs/as-module.rst b/docs/as-module.rst index f912dcc..44a8cce 100644 --- a/docs/as-module.rst +++ b/docs/as-module.rst @@ -135,6 +135,17 @@ User Stories .. autoclass:: StoryItem :no-show-inheritance: +Highlights +"""""""""" + +.. autoclass:: Highlight + :no-show-inheritance: + :inherited-members: + + Bases: :class:`Story` + + .. versionadded:: 4.1 + Profiles """""""" diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index c9cdaaf..a3ad545 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -60,12 +60,27 @@ already have a valid session cookie file. What to Download ^^^^^^^^^^^^^^^^ +.. targets-start + Instaloader supports the following targets: - ``profile`` - Public profile, or private profile with :option:`--login`. For each profile - you download, :option:`--stories` instructs Instaloader to also - **download the user's stories**. + Public profile, or private profile with :option:`--login`. + + If an already-downloaded profile has been renamed, Instaloader automatically + finds it by its unique ID and renames the folder accordingly. + + Besides the profile's posts, its current profile picture is downloaded. For + each profile you download, + + - :option:`--stories` + instructs Instaloader to also **download the user's stories**, + + - :option:`--highlights` + to **download highlights of each profile that is downloaded**, and + + - :option:`--tagged` + to **download posts where the user is tagged**. - ``"#hashtag"`` Posts with a certain **hashtag** (the quotes are usually necessary), @@ -84,6 +99,14 @@ Instaloader supports the following targets: All profiles that are followed by ``profile``, i.e. the *followees* of ``profile`` (requires :option:`--login`). +- ``-post`` + The single **post** with the given shortcode. Must be preceeded by ``--`` in + the argument list to not be mistaken as an option flag. + + .. versionadded:: 4.1 + +.. targets-end + Instaloader goes through all media matching the specified targets and downloads the pictures and videos and their captions. You can specify diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 2b6e6fc..1b8375b 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -13,27 +13,41 @@ feed), ``:stories`` (stories of your followees) or ``:saved`` (collection of posts marked as saved). Here we explain the additional options that can be given to Instaloader to -customize its behavior. To get a list of all flags, their abbreviations and -their descriptions, you may also run ``instaloader --help``. For an +customize its behavior. For an introduction on how to use Instaloader, see :ref:`download-pictures-from-instagram`. -What to Download -^^^^^^^^^^^^^^^^ +To get a list of all flags, their abbreviations and +their descriptions, you may also run:: -Specify a list of targets (profiles, #hashtags, ``:feed``, ``:stories`` or -``:saved``). For each of these, Instaloader creates a folder and stores all -posts along with the pictures's captions and the current **profile picture** -there. If an already-downloaded profile has been renamed, Instaloader -automatically **finds it by its unique ID** and renames the folder likewise. + instaloader --help -.. option:: --profile-pic-only, -P +Targets +^^^^^^^ - Only download profile picture. +Specify a list of targets. For each of these, Instaloader creates a folder and +stores all posts along with the pictures's captions there. -.. option:: --no-profile-pic +.. include:: basic-usage.rst + :start-after: targets-start + :end-before: targets-end - Do not download profile picture. +- ``filename.json[.xz]`` + Re-Download the given object + +- ``+args.txt`` + Read targets (and options) from given textfile. See :option:`+args.txt`. + +What to Download of each Post +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. option:: --no-pictures + + Do not download post pictures. Cannot be used together with + :option:`--fast-update`. Implies :option:`--no-video-thumbnails`, does not + imply :option:`--no-videos`. + + .. versionadded:: 4.1 .. option:: --no-videos, -V @@ -69,11 +83,6 @@ automatically **finds it by its unique ID** and renames the folder likewise. Template to write in txt file for each StoryItem. See :ref:`metadata-text-files`. -.. option:: --stories, -s - - Also **download stories** of each profile that is downloaded. Requires - :option:`--login`. - .. option:: --no-metadata-json Do not create a JSON file containing the metadata of each post. @@ -82,8 +91,50 @@ automatically **finds it by its unique ID** and renames the folder likewise. Do not xz compress JSON files, rather create pretty formatted JSONs. + +What to Download of each Profile +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. option:: --profile-pic-only, -P + + .. deprecated:: 4.1 + Use :option:`--no-posts`. + + Only download profile picture. + +.. option:: --no-posts + + Do not download regular posts. + + .. versionadded:: 4.1 + +.. option:: --no-profile-pic + + Do not download profile picture. + +.. option:: --stories, -s + + Also **download stories** of each profile that is downloaded. Requires + :option:`--login`. + +.. option:: --highlights + + Also **download highlights** of each profile that is downloaded. Requires + :option:`--login`. + + .. versionadded:: 4.1 + +.. option:: --tagged + + Also download posts where each profile is tagged. + + .. versionadded:: 4.1 + .. option:: --stories-only + .. deprecated:: 4.1 + Use :option:`--stories` :option:`--no-posts`. + Rather than downloading regular posts of each specified profile, only download stories. Requires :option:`--login`. Does not imply :option:`--no-profile-pic`. @@ -93,6 +144,15 @@ automatically **finds it by its unique ID** and renames the folder likewise. If possible, use ``:stories`` target rather than :option:`--stories-only` with all your followees. ``:stories`` uses fewer API requests. +Which Posts to Download +^^^^^^^^^^^^^^^^^^^^^^^ + +.. option:: --fast-update, -F + + For each target, stop when encountering the first already-downloaded picture. + This flag is recommended when you use Instaloader to update your personal + Instagram archive. + .. option:: --post-filter filter, --only-if filter Expression that, if given, must evaluate to True for each post to be @@ -108,20 +168,6 @@ automatically **finds it by its unique ID** and renames the folder likewise. evaluated to :class:`instaloader.StoryItem` attributes. See :ref:`filter-posts` for more examples. - - -When to Stop Downloading -^^^^^^^^^^^^^^^^^^^^^^^^ - -If none of these options are given, Instaloader goes through all pictures -matching the specified targets. - -.. option:: --fast-update, -F - - For each target, stop when encountering the first already-downloaded picture. - This flag is recommended when you use Instaloader to update your personal - Instagram archive. - .. option:: --count COUNT, -c Do not attempt to download more than COUNT posts. Applies only to @@ -192,3 +238,21 @@ Miscellaneous Options Disable user interaction, i.e. do not print messages (except errors) and fail if login credentials are needed but not given. This is handy for running :ref:`instaloader-as-cronjob`. + +.. option:: +args.txt + + Read arguments from file `args.txt`, a shortcut to provide argument from + file rather than command-line. This provide a convient way to hide login + info from CLI. and also can use for simplify managment of long arguments. + + .. note:: + + text file should separate arg with line break. + + args.txt example:: + + --login MYUSENAME + --password MYPASSWORD + --fast-update + + .. versionadded:: 4.1 diff --git a/docs/index.rst b/docs/index.rst index 5f0987f..ce2d0ad 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -41,7 +41,7 @@ See :ref:`install` for more options on how to install Instaloader. :: - instaloader [--comments] [--geotags] [--stories] + instaloader [--comments] [--geotags] [--stories] [--highlights] [--tagged] [--login YOUR-USERNAME] [--fast-update] profile | "#hashtag" | :stories | :feed | :saved diff --git a/instaloader/__init__.py b/instaloader/__init__.py index df41aeb..7ced198 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.0.8' +__version__ = '4.1rc1' try: @@ -15,5 +15,5 @@ else: from .exceptions import * from .instaloader import Instaloader from .instaloadercontext import InstaloaderContext -from .structures import (Post, PostSidecarNode, PostComment, PostLocation, Profile, Story, StoryItem, +from .structures import (Highlight, Post, PostSidecarNode, PostComment, PostLocation, Profile, Story, StoryItem, load_structure_from_file, save_structure_to_file) diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 9f9c54e..ca54e88 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -18,7 +18,7 @@ def usage_string(): argv0 = os.path.basename(sys.argv[0]) argv0 = "instaloader" if argv0 == "__main__.py" else argv0 return """ -{0} [--comments] [--geotags] [--stories] +{0} [--comments] [--geotags] [--stories] [--highlights] [--tagged] {2:{1}} [--login YOUR-USERNAME] [--fast-update] {2:{1}} profile | "#hashtag" | :stories | :feed | :saved {0} --help""".format(argv0, len(argv0), '') @@ -58,11 +58,11 @@ def filterstr_to_filterfunc(filter_str: str, item_type: type): def _main(instaloader: Instaloader, targetlist: List[str], username: Optional[str] = None, password: Optional[str] = None, - sessionfile: Optional[str] = None, max_count: Optional[int] = None, - profile_pic: bool = True, profile_pic_only: bool = False, + sessionfile: Optional[str] = None, + download_profile_pic: bool = True, download_posts=True, + download_stories: bool = False, download_highlights: bool = False, download_tagged: bool = False, fast_update: bool = False, - stories: bool = False, stories_only: bool = False, - post_filter_str: Optional[str] = None, + max_count: Optional[int] = None, post_filter_str: Optional[str] = None, storyitem_filter_str: Optional[str] = None) -> None: """Download set of profiles, hashtags etc. and handle logging in and session files if desired.""" # Parse and generate filter function @@ -88,10 +88,6 @@ def _main(instaloader: Instaloader, targetlist: List[str], else: instaloader.interactive_login(username) instaloader.context.log("Logged in as %s." % username) - # Determine what to download - download_profile_pic = profile_pic or profile_pic_only - download_profile_posts = not (stories_only or profile_pic_only) - download_profile_stories = stories or stories_only # Try block for KeyboardInterrupt (save session on ^C) profiles = set() anonymous_retry_profiles = set() @@ -132,6 +128,8 @@ def _main(instaloader: Instaloader, targetlist: List[str], elif target[0] == '#': instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update, post_filter=post_filter) + elif target[0] == '-': + instaloader.download_post(Post.from_shortcode(instaloader.context, target[1:]), target) elif target == ":feed": instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count, post_filter=post_filter) @@ -144,7 +142,7 @@ def _main(instaloader: Instaloader, targetlist: List[str], try: profile = instaloader.check_profile_id(target) if instaloader.context.is_logged_in and profile.has_blocked_viewer: - if download_profile_pic or (download_profile_posts and not profile.is_private): + if download_profile_pic or ((download_posts or download_tagged) and not profile.is_private): raise ProfileNotExistsException("{} blocked you; But we download her anonymously." .format(target)) else: @@ -154,37 +152,30 @@ def _main(instaloader: Instaloader, targetlist: List[str], except ProfileNotExistsException as err: # Not only our profile.has_blocked_viewer condition raises ProfileNotExistsException, # check_profile_id() also does, since access to blocked profile may be responded with 404. - if instaloader.context.is_logged_in and (download_profile_pic or download_profile_posts): + if instaloader.context.is_logged_in and (download_profile_pic or download_posts or + download_tagged): instaloader.context.log(err) instaloader.context.log("Trying again anonymously, helps in case you are just blocked.") with instaloader.anonymous_copy() as anonymous_loader: with instaloader.context.error_catcher(): anonymous_retry_profiles.add(anonymous_loader.check_profile_id(target)) - instaloader.context.log("Looks good.") + instaloader.context.error("Warning: {} will be downloaded anonymously (\"{}\")." + .format(target, err)) else: raise if len(profiles) > 1: instaloader.context.log("Downloading {} profiles: {}".format(len(profiles), ' '.join([p.username for p in profiles]))) - if download_profile_pic or download_profile_posts: - # Iterate through profiles list and download them - for target in profiles: - with instaloader.context.error_catcher(target): - instaloader.download_profile(target, download_profile_pic, not download_profile_posts, - fast_update, post_filter=post_filter) - if anonymous_retry_profiles: - instaloader.context.log("Downloading anonymously: {}" - .format(' '.join([p.username for p in anonymous_retry_profiles]))) - with instaloader.anonymous_copy() as anonymous_loader: - for target in anonymous_retry_profiles: - with instaloader.context.error_catcher(target): - anonymous_loader.download_profile(target, download_profile_pic, not download_profile_posts, - fast_update, post_filter=post_filter) - if download_profile_stories and profiles: - with instaloader.context.error_catcher("Download stories"): - instaloader.context.log("Downloading stories") - instaloader.download_stories(userids=list(profiles), fast_update=fast_update, - filename_target=None, storyitem_filter=storyitem_filter) + instaloader.download_profiles(profiles, + download_profile_pic, download_posts, download_tagged, download_highlights, + download_stories, fast_update, post_filter, storyitem_filter) + if anonymous_retry_profiles: + instaloader.context.log("Downloading anonymously: {}" + .format(' '.join([p.username for p in anonymous_retry_profiles]))) + with instaloader.anonymous_copy() as anonymous_loader: + anonymous_loader.download_profiles(anonymous_retry_profiles, + download_profile_pic, download_posts, download_tagged, + fast_update=fast_update, post_filter=post_filter) except KeyboardInterrupt: print("\nInterrupted by user.", file=sys.stderr) # Save session if it is useful @@ -204,72 +195,95 @@ def main(): parser = ArgumentParser(description=__doc__, add_help=False, usage=usage_string(), epilog="Report issues at https://github.com/instaloader/instaloader/issues. " "The complete documentation can be found at " - "https://instaloader.github.io/.") + "https://instaloader.github.io/.", + fromfile_prefix_chars='+') - g_what = parser.add_argument_group('What to Download', - 'Specify a list of profiles or #hashtags. For each of these, Instaloader ' - 'creates a folder and ' - 'downloads all posts along with the pictures\'s ' - 'captions and the current profile picture. ' - 'If an already-downloaded profile has been renamed, Instaloader automatically ' - 'finds it by its unique ID and renames the folder likewise.') - g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag', - help='Name of profile or #hashtag to download. ' - 'Alternatively, if --login is given: @ to download all followees of ' - '; the special targets ' - ':feed to download pictures from your feed; ' - ':stories to download the stories of your followees; or ' - ':saved to download the posts marked as saved.') - g_what.add_argument('-P', '--profile-pic-only', action='store_true', - help='Only download profile picture.') - g_what.add_argument('--no-profile-pic', action='store_true', + g_targets = parser.add_argument_group("What to Download", + "Specify a list of targets. For each of these, Instaloader creates a folder " + "and downloads all posts. The following targets are supported:") + g_targets.add_argument('profile', nargs='*', + help="Download profile. If an already-downloaded profile has been renamed, Instaloader " + "automatically finds it by its unique ID and renames the folder likewise.") + g_targets.add_argument('_at_profile', nargs='*', metavar="@profile", + help="Download all followees of profile. Requires --login. " + "Consider using :feed rather than @yourself.") + g_targets.add_argument('_hashtag', nargs='*', metavar='"#hashtag"', help="Download #hashtag.") + g_targets.add_argument('_feed', nargs='*', metavar=":feed", + help="Download pictures from your feed. Requires --login.") + g_targets.add_argument('_stories', nargs='*', metavar=":stories", + help="Download the stories of your followees. Requires --login.") + g_targets.add_argument('_saved', nargs='*', metavar=":saved", + help="Download the posts that you marked as saved. Requires --login.") + g_targets.add_argument('_singlepost', nargs='*', metavar="-- -shortcode", + help="Download the post with the given shortcode") + g_targets.add_argument('_json', nargs='*', metavar="filename.json[.xz]", + help="Re-Download the given object.") + g_targets.add_argument('_fromfile', nargs='*', metavar="+args.txt", + help="Read targets (and options) from given textfile.") + + g_post = parser.add_argument_group("What to Download of each Post") + + g_prof = parser.add_argument_group("What to Download of each Profile") + + g_prof.add_argument('-P', '--profile-pic-only', action='store_true', + help=SUPPRESS) + g_prof.add_argument('--no-posts', action='store_true', + help="Do not download regular posts.") + g_prof.add_argument('--no-profile-pic', action='store_true', help='Do not download profile picture.') - g_what.add_argument('-V', '--no-videos', action='store_true', + g_post.add_argument('--no-pictures', action='store_true', + help='Do not download post pictures. Cannot be used together with --fast-update. ' + 'Implies --no-video-thumbnails, does not imply --no-videos.') + g_post.add_argument('-V', '--no-videos', action='store_true', help='Do not download videos.') - g_what.add_argument('--no-video-thumbnails', action='store_true', + g_post.add_argument('--no-video-thumbnails', action='store_true', help='Do not download thumbnails of videos.') - g_what.add_argument('-G', '--geotags', action='store_true', + g_post.add_argument('-G', '--geotags', action='store_true', help='Download geotags when available. Geotags are stored as a ' 'text file with the location\'s name and a Google Maps link. ' 'This requires an additional request to the Instagram ' 'server for each picture, which is why it is disabled by default.') - g_what.add_argument('-C', '--comments', action='store_true', + g_post.add_argument('-C', '--comments', action='store_true', help='Download and update comments for each post. ' 'This requires an additional request to the Instagram ' 'server for each post, which is why it is disabled by default.') - g_what.add_argument('--no-captions', action='store_true', + g_post.add_argument('--no-captions', action='store_true', help='Do not create txt files.') - g_what.add_argument('--post-metadata-txt', action='append', + g_post.add_argument('--post-metadata-txt', action='append', help='Template to write in txt file for each Post.') - g_what.add_argument('--storyitem-metadata-txt', action='append', + g_post.add_argument('--storyitem-metadata-txt', action='append', help='Template to write in txt file for each StoryItem.') - g_what.add_argument('--no-metadata-json', action='store_true', + g_post.add_argument('--no-metadata-json', action='store_true', help='Do not create a JSON file containing the metadata of each post.') - g_what.add_argument('--metadata-json', action='store_true', + g_post.add_argument('--metadata-json', action='store_true', help=SUPPRESS) - g_what.add_argument('--no-compress-json', action='store_true', + g_post.add_argument('--no-compress-json', action='store_true', help='Do not xz compress JSON files, rather create pretty formatted JSONs.') - g_what.add_argument('-s', '--stories', action='store_true', + g_prof.add_argument('-s', '--stories', action='store_true', help='Also download stories of each profile that is downloaded. Requires --login.') - g_what.add_argument('--stories-only', action='store_true', - help='Rather than downloading regular posts of each specified profile, only download ' - 'stories. Requires --login. Does not imply --no-profile-pic.') - g_what.add_argument('--post-filter', '--only-if', metavar='filter', + g_prof.add_argument('--stories-only', action='store_true', + help=SUPPRESS) + g_prof.add_argument('--highlights', action='store_true', + help='Also download highlights of each profile that is downloaded. Requires --login.') + g_prof.add_argument('--tagged', action='store_true', + help='Also download posts where each profile is tagged.') + + g_cond = parser.add_argument_group("Which Posts to Download") + + g_cond.add_argument('-F', '--fast-update', action='store_true', + help='For each target, stop when encountering the first already-downloaded picture. This ' + 'flag is recommended when you use Instaloader to update your personal Instagram archive.') + + g_cond.add_argument('--post-filter', '--only-if', metavar='filter', help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be ' 'a syntactically valid python expression. Variables are evaluated to ' 'instaloader.Post attributes. Example: --post-filter=viewer_has_liked.') - g_what.add_argument('--storyitem-filter', metavar='filter', + g_cond.add_argument('--storyitem-filter', metavar='filter', help='Expression that, if given, must evaluate to True for each storyitem to be downloaded. ' 'Must be a syntactically valid python expression. Variables are evaluated to ' 'instaloader.StoryItem attributes.') - g_stop = parser.add_argument_group('When to Stop Downloading', - 'If none of these options are given, Instaloader goes through all pictures ' - 'matching the specified targets.') - g_stop.add_argument('-F', '--fast-update', action='store_true', - help='For each target, stop when encountering the first already-downloaded picture. This ' - 'flag is recommended when you use Instaloader to update your personal Instagram archive.') - g_stop.add_argument('-c', '--count', + g_cond.add_argument('-c', '--count', help='Do not attempt to download more than COUNT posts. ' 'Applies only to #hashtag and :feed.') @@ -339,8 +353,17 @@ def main(): raise SystemExit("--no-captions and --post-metadata-txt or --storyitem-metadata-txt given; " "That contradicts.") + if args.no_pictures and args.fast_update: + raise SystemExit('--no-pictures and --fast-update cannot be used together.') + + # Determine what to download + download_profile_pic = not args.no_profile_pic or args.profile_pic_only + download_posts = not (args.no_posts or args.stories_only or args.profile_pic_only) + download_stories = args.stories or args.stories_only + loader = Instaloader(sleep=not args.no_sleep, quiet=args.quiet, user_agent=args.user_agent, dirname_pattern=args.dirname_pattern, filename_pattern=args.filename_pattern, + download_pictures=not args.no_pictures, download_videos=not args.no_videos, download_video_thumbnails=not args.no_video_thumbnails, download_geotags=args.geotags, download_comments=args.comments, save_metadata=not args.no_metadata_json, @@ -354,12 +377,13 @@ def main(): username=args.login.lower() if args.login is not None else None, password=args.password, sessionfile=args.sessionfile, - max_count=int(args.count) if args.count is not None else None, - profile_pic=not args.no_profile_pic, - profile_pic_only=args.profile_pic_only, + download_profile_pic=download_profile_pic, + download_posts=download_posts, + download_stories=download_stories, + download_highlights=args.highlights, + download_tagged=args.tagged, fast_update=args.fast_update, - stories=args.stories, - stories_only=args.stories_only, + max_count=int(args.count) if args.count is not None else None, post_filter_str=args.post_filter, storyitem_filter_str=args.storyitem_filter) loader.close() diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index d8e5ec6..f7e8a9e 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -11,11 +11,11 @@ from contextlib import contextmanager, suppress from datetime import datetime, timezone from functools import wraps from io import BytesIO -from typing import Any, Callable, Iterator, List, Optional, Union +from typing import Any, Callable, Iterator, List, Optional, Set, Union from .exceptions import * from .instaloadercontext import InstaloaderContext -from .structures import JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file +from .structures import Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, save_structure_to_file def get_default_session_filename(username: str) -> str: @@ -70,7 +70,10 @@ class _PostPathFormatter(_ArbitraryItemFormatter): def vformat(self, format_string, args, kwargs): """Override :meth:`string.Formatter.vformat` for character substitution in paths for Windows, see issue #84.""" ret = super().vformat(format_string, args, kwargs) - return ret.replace(':', '\ua789') if platform.system() == 'Windows' else ret + if platform.system() == 'Windows': + ret = ret.replace(':', '\ua789').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') + ret = ret.replace('\\', '\uff3c').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') + return ret class Instaloader: @@ -80,6 +83,7 @@ class Instaloader: :param user_agent: :option:`--user-agent` :param dirname_pattern: :option:`--dirname-pattern`, default is ``{target}`` :param filename_pattern: :option:`--filename-pattern`, default is ``{date_utc}_UTC`` + :param download_pictures: not :option:`--no-pictures` :param download_videos: not :option:`--no-videos` :param download_video_thumbnails: not :option:`--no-video-thumbnails` :param download_geotags: :option:`--geotags` @@ -102,6 +106,7 @@ class Instaloader: user_agent: Optional[str] = None, dirname_pattern: Optional[str] = None, filename_pattern: Optional[str] = None, + download_pictures=True, download_videos: bool = True, download_video_thumbnails: bool = True, download_geotags: bool = True, @@ -118,6 +123,7 @@ class Instaloader: # configuration parameters self.dirname_pattern = dirname_pattern or "{target}" self.filename_pattern = filename_pattern or "{date_utc}_UTC" + self.download_pictures = download_pictures self.download_videos = download_videos self.download_video_thumbnails = download_video_thumbnails self.download_geotags = download_geotags @@ -133,11 +139,15 @@ class Instaloader: def anonymous_copy(self): """Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log.""" new_loader = Instaloader(self.context.sleep, self.context.quiet, self.context.user_agent, self.dirname_pattern, - self.filename_pattern, self.download_videos, self.download_video_thumbnails, - self.download_geotags, self.download_comments, self.save_metadata, - self.compress_json, self.post_metadata_txt_pattern, - self.storyitem_metadata_txt_pattern, self.context.graphql_count_per_slidingwindow, - self.context.max_connection_attempts) + self.filename_pattern, download_pictures=self.download_pictures, + download_videos=self.download_videos, + download_video_thumbnails=self.download_video_thumbnails, + download_geotags=self.download_geotags, download_comments=self.download_comments, + save_metadata=self.save_metadata, compress_json=self.compress_json, + post_metadata_txt_pattern=self.post_metadata_txt_pattern, + storyitem_metadata_txt_pattern=self.storyitem_metadata_txt_pattern, + graphql_rate_limit=self.context.graphql_count_per_slidingwindow, + max_connection_attempts=self.context.max_connection_attempts) new_loader.context.query_timestamps = self.context.query_timestamps yield new_loader self.context.error_log.extend(new_loader.context.error_log) @@ -328,6 +338,12 @@ class Instaloader: :raises ConnectionException: If connection to Instagram failed.""" self.context.login(user, passwd) + def format_filename(self, item: Union[Post, StoryItem], target: Optional[str] = None): + """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. + + .. versionadded:: 4.1""" + return _PostPathFormatter(item).format(self.filename_pattern, target=target) + def download_post(self, post: Post, target: str) -> bool: """ Download everything associated with one instagram post node, i.e. picture, caption and video. @@ -338,30 +354,31 @@ class Instaloader: """ dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) - filename = dirname + '/' + _PostPathFormatter(post).format(self.filename_pattern, target=target) + filename = dirname + '/' + self.format_filename(post, target=target) os.makedirs(os.path.dirname(filename), exist_ok=True) # Download the image(s) / video thumbnail and videos within sidecars if desired downloaded = False - if post.typename == 'GraphSidecar': - edge_number = 1 - for sidecar_node in post.get_sidecar_nodes(): - # Download picture or video thumbnail - if not sidecar_node.is_video or self.download_video_thumbnails is True: - downloaded |= self.download_pic(filename=filename, url=sidecar_node.display_url, - mtime=post.date_local, filename_suffix=str(edge_number)) - # Additionally download video if available and desired - if sidecar_node.is_video and self.download_videos is True: - downloaded |= self.download_pic(filename=filename, url=sidecar_node.video_url, - mtime=post.date_local, filename_suffix=str(edge_number)) - edge_number += 1 - elif post.typename == 'GraphImage': - downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local) - elif post.typename == 'GraphVideo': - if self.download_video_thumbnails is True: + if self.download_pictures: + if post.typename == 'GraphSidecar': + edge_number = 1 + for sidecar_node in post.get_sidecar_nodes(): + # Download picture or video thumbnail + if not sidecar_node.is_video or self.download_video_thumbnails is True: + downloaded |= self.download_pic(filename=filename, url=sidecar_node.display_url, + mtime=post.date_local, filename_suffix=str(edge_number)) + # Additionally download video if available and desired + if sidecar_node.is_video and self.download_videos is True: + downloaded |= self.download_pic(filename=filename, url=sidecar_node.video_url, + mtime=post.date_local, filename_suffix=str(edge_number)) + edge_number += 1 + elif post.typename == 'GraphImage': downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local) - else: - self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename)) + elif post.typename == 'GraphVideo': + if self.download_video_thumbnails is True: + downloaded = self.download_pic(filename=filename, url=post.url, mtime=post.date_local) + else: + self.context.error("Warning: {0} has unknown typename: {1}".format(post, post.typename)) # Save caption if desired metadata_string = _ArbitraryItemFormatter(post).format(self.post_metadata_txt_pattern).strip() @@ -462,7 +479,7 @@ class Instaloader: date_local = item.date_local dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) - filename = dirname + '/' + _PostPathFormatter(item).format(self.filename_pattern, target=target) + filename = dirname + '/' + self.format_filename(item, target=target) os.makedirs(os.path.dirname(filename), exist_ok=True) downloaded = False if not item.is_video or self.download_video_thumbnails is True: @@ -480,6 +497,62 @@ class Instaloader: self.context.log() return downloaded + @_requires_login + def get_highlights(self, user: Union[int, Profile]) -> Iterator[Highlight]: + """Get all highlights from a user. + To use this, one needs to be logged in. + + .. versionadded:: 4.1 + + :param user: ID or Profile of the user whose highlights should get fetched. + """ + + userid = user if isinstance(user, int) else user.userid + data = self.context.graphql_query("7c16654f22c819fb63d1183034a5162f", + {"user_id": userid, "include_chaining": False, "include_reel": False, + "include_suggested_users": False, "include_logged_out_extras": False, + "include_highlight_reels": True})["data"]["user"]['edge_highlight_reels'] + if data is None: + raise BadResponseException('Bad highlights reel JSON.') + yield from (Highlight(self.context, edge['node'], user if isinstance(user, Profile) else None) + for edge in data['edges']) + + @_requires_login + def download_highlights(self, + user: Union[int, Profile], + fast_update: bool = False, + filename_target: Optional[str] = None, + storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None: + """ + Download available highlights from a user whose ID is given. + To use this, one needs to be logged in. + + .. versionadded:: 4.1 + + :param user: ID or Profile of the user whose highlights should get downloaded. + :param fast_update: If true, abort when first already-downloaded picture is encountered + :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern + or None if profile name and the highlights' titles should be used instead + :param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded + """ + for user_highlight in self.get_highlights(user): + name = user_highlight.owner_username + self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name)) + totalcount = user_highlight.itemcount + count = 1 + for item in user_highlight.get_items(): + if storyitem_filter is not None and not storyitem_filter(item): + self.context.log("<{} skipped>".format(item), flush=True) + continue + self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) + count += 1 + with self.context.error_catcher('Download highlights \"{}\" from user {}'.format(user_highlight.title, name)): + downloaded = self.download_storyitem(item, filename_target + if filename_target + else '{}/{}'.format(name, user_highlight.title)) + if fast_update and not downloaded: + break + @_requires_login def get_feed_posts(self) -> Iterator[Post]: """Get Posts of the user's feed. @@ -619,6 +692,26 @@ class Instaloader: if fast_update and not downloaded: break + def download_tagged(self, profile: Profile, fast_update: bool = False, + target: Optional[str] = None, + post_filter: Optional[Callable[[Post], bool]] = None) -> None: + """Download all posts where a profile is tagged. + + .. versionadded:: 4.1""" + if target is None: + target = profile.username + '/:tagged' + self.context.log("Retrieving tagged posts for profile {}.".format(profile.username)) + count = 1 + for post in profile.get_tagged_posts(): + self.context.log("[%3i/???] " % (count), end="", flush=True) + count += 1 + if post_filter is not None and not post_filter(post): + self.context.log('<{} skipped>'.format(post)) + with self.context.error_catcher('Download tagged {}'.format(profile.username)): + downloaded = self.download_post(post, target) + if fast_update and not downloaded: + break + def _get_id_filename(self, profile_name: str) -> str: if ((format_string_contains_key(self.dirname_pattern, 'profile') or format_string_contains_key(self.dirname_pattern, 'target'))): @@ -630,6 +723,8 @@ class Instaloader: def save_profile_id(self, profile: Profile): """ Store ID of profile locally. + + .. versionadded:: 4.0.6 """ os.makedirs(self.dirname_pattern.format(profile=profile.username, target=profile.username), exist_ok=True) @@ -682,13 +777,102 @@ class Instaloader: return profile raise ProfileNotExistsException("Profile {0} does not exist.".format(profile_name)) + def download_profiles(self, profiles: Set[Profile], + profile_pic: bool = True, posts: bool = True, + tagged: bool = False, highlights: bool = False, stories: bool = False, + fast_update: bool = False, + post_filter: Optional[Callable[[Post], bool]] = None, + storyitem_filter: Optional[Callable[[Post], bool]] = None, + raise_errors: bool = False): + """High-level method to download set of profiles. + + :param profiles: Set of profiles to download. + :param profile_pic: not :option:`--no-profile-pic`. + :param posts: not :option:`--no-posts`. + :param tagged: :option:`--tagged`. + :param highlights: :option:`--highlights`. + :param stories: :option:`--stories`. + :param fast_update: :option:`--fast-update`. + :param post_filter: :option:`--post-filter`. + :param storyitem_filter: :option:`--post-filter`. + :param raise_errors: + Whether :exc:`LoginRequiredException` and :exc:`PrivateProfileNotFollowedException` should be raised or + catched and printed with :meth:`InstaloaderContext.error_catcher`. + + .. versionadded:: 4.1""" + + def _error_raiser(_str): + yield + + error_handler = _error_raiser if raise_errors else self.context.error_catcher + + for profile in profiles: + with error_handler(profile.username): + profile_name = profile.username + + # Save metadata as JSON if desired. + if self.save_metadata: + json_filename = '{0}/{1}_{2}'.format(self.dirname_pattern.format(profile=profile_name, + target=profile_name), + profile_name, profile.userid) + self.save_metadata_json(json_filename, profile) + + # Download profile picture + if profile_pic: + with self.context.error_catcher('Download profile picture of {}'.format(profile_name)): + self.download_profilepic(profile) + + # Catch some errors + if profile.is_private: + if not self.context.is_logged_in: + raise LoginRequiredException("--login=USERNAME required.") + if not profile.followed_by_viewer and self.context.username != profile.username: + raise PrivateProfileNotFollowedException("Private but not followed.") + + # Download tagged, if requested + if tagged: + with self.context.error_catcher('Download tagged of {}'.format(profile_name)): + self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter) + + # Download highlights, if requested + if highlights: + with self.context.error_catcher('Download highlights of {}'.format(profile_name)): + self.download_highlights(profile, fast_update=fast_update, storyitem_filter=storyitem_filter) + + # Iterate over pictures and download them + if posts: + self.context.log("Retrieving posts from profile {}.".format(profile_name)) + totalcount = profile.mediacount + count = 1 + for post in profile.get_posts(): + self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) + count += 1 + if post_filter is not None and not post_filter(post): + self.context.log('') + continue + with self.context.error_catcher("Download {} of {}".format(post, profile_name)): + downloaded = self.download_post(post, target=profile_name) + if fast_update and not downloaded: + break + + if stories and profiles: + with self.context.error_catcher("Download stories"): + self.context.log("Downloading stories") + self.download_stories(userids=list(profiles), fast_update=fast_update, filename_target=None, + storyitem_filter=storyitem_filter) + def download_profile(self, profile_name: Union[str, Profile], profile_pic: bool = True, profile_pic_only: bool = False, fast_update: bool = False, download_stories: bool = False, download_stories_only: bool = False, + download_tagged: bool = False, download_tagged_only: bool = False, post_filter: Optional[Callable[[Post], bool]] = None, storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None: - """Download one profile""" + """Download one profile + + .. deprecated:: 4.1 + Use :meth:`Instaloader.download_profiles`. + """ # Get profile main page json # check if profile does exist or name has changed since last download @@ -739,6 +923,13 @@ class Instaloader: if download_stories_only: return + # Download tagged, if requested + if download_tagged or download_tagged_only: + with self.context.error_catcher('Download tagged of {}'.format(profile_name)): + self.download_tagged(profile, fast_update=fast_update, post_filter=post_filter) + if download_tagged_only: + return + # Iterate over pictures and download them self.context.log("Retrieving posts from profile {}.".format(profile_name)) totalcount = profile.mediacount diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index d17ce43..8601960 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -57,7 +57,7 @@ class InstaloaderContext: self.quiet = quiet self.max_connection_attempts = max_connection_attempts self._graphql_page_length = 50 - self.graphql_count_per_slidingwindow = graphql_count_per_slidingwindow or 20 + self.graphql_count_per_slidingwindow = graphql_count_per_slidingwindow or 200 self._root_rhx_gis = None # error log, filled with error() and printed at the end of Instaloader.main() @@ -69,6 +69,9 @@ class InstaloaderContext: # Can be set to True for testing, disables supression of InstaloaderContext._error_catcher self.raise_all_errors = False + # Cache profile from id (mapping from id to Profile) + self.profile_id_cache = dict() + @contextmanager def anonymous_copy(self): session = self._session @@ -218,7 +221,7 @@ class InstaloaderContext: def _sleep(self): """Sleep a short time if self.sleep is set. Called before each request to instagram.com.""" if self.sleep: - time.sleep(min(random.expovariate(0.6), 5.0)) + time.sleep(min(random.expovariate(0.7), 5.0)) def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com', session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]: diff --git a/instaloader/structures.py b/instaloader/structures.py index 208a67d..5c8a6f2 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -402,6 +402,8 @@ class Profile: :param profile_id: userid :raises: :class:`ProfileNotExistsException`, :class:`ProfileHasNoPicsException` """ + if profile_id in context.profile_id_cache: + return context.profile_id_cache[profile_id] data = context.graphql_query("472f257a40c653c64c666ce877d59d2b", {'id': str(profile_id), 'first': 1}, rhx_gis=context.root_rhx_gis)['data']['user'] @@ -415,7 +417,9 @@ class Profile: raise ProfileHasNoPicsException("Profile with ID {0}: no pics found.".format(str(profile_id))) else: raise LoginRequiredException("Login required to determine username (ID: " + str(profile_id) + ").") - return Post(context, data['edges'][0]['node']).owner_profile + profile = Post(context, data['edges'][0]['node']).owner_profile + context.profile_id_cache[profile_id] = profile + return profile def _asdict(self): json_node = self._node.copy() @@ -574,7 +578,9 @@ class Profile: @property def profile_pic_url(self) -> str: - """Return URL of profile picture""" + """Return URL of profile picture + + .. versionadded:: 4.0.3""" try: return self._iphone_struct['hd_profile_pic_url_info']['url'] except (InstaloaderException, KeyError) as err: @@ -614,7 +620,9 @@ class Profile: self._metadata('edge_saved_media'))) def get_tagged_posts(self) -> Iterator[Post]: - """Retrieve all posts where a profile is tagged.""" + """Retrieve all posts where a profile is tagged. + + .. versionadded:: 4.0.7""" self._obtain_metadata() yield from (Post(self._context, node, self if int(node['owner']['id']) == self.userid else None) for node in self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7", @@ -780,7 +788,7 @@ class Story: # story is a Story object for item in story.get_items(): # item is a StoryItem object - L.download_storyitem(item, ':stores') + L.download_storyitem(item, ':stories') This class implements == and is hashable. @@ -803,7 +811,7 @@ class Story: return NotImplemented def __hash__(self) -> int: - return hash(self._unique_id) + return hash(self.unique_id) @property def unique_id(self) -> str: @@ -866,6 +874,83 @@ class Story: yield from (StoryItem(self._context, item, self.owner_profile) for item in reversed(self._node['items'])) +class Highlight(Story): + """ + Structure representing a user's highlight with its associated story items. + + Provides methods for accessing highlight properties, as well as :meth:`Highlight.get_items` to request associated + :class:`StoryItem` nodes. Highlights are returned by :meth:`Instaloader.get_highlights`. + + With a logged-in :class:`Instaloader` instance `L`, you may download all highlights of a :class:`Profile` instance + USER with:: + + for highlight in L.get_highlights(USER): + # highlight is a Highlight object + for item in highlight.get_items(): + # item is a StoryItem object + L.download_storyitem(item, '{}/{}'.format(highlight.owner_username, highlight.title)) + + This class implements == and is hashable. + + :param context: :class:`InstaloaderContext` instance used for additional queries if necessary. + :param node: Dictionary containing the available information of the highlight as returned by Instagram. + :param owner: :class:`Profile` instance representing the owner profile of the highlight. + """ + + def __init__(self, context: InstaloaderContext, node: Dict[str, Any], owner: Optional[Profile] = None): + super().__init__(context, node) + self._owner_profile = owner + self._items = None + + def __repr__(self): + return ''.format(self.owner_username, self.title) + + @property + def unique_id(self) -> int: + """A unique ID identifying this set of highlights.""" + return int(self._node['id']) + + @property + def owner_profile(self) -> Profile: + """:class:`Profile` instance of the highlights' owner.""" + if not self._owner_profile: + self._owner_profile = Profile(self._context, self._node['owner']) + return self._owner_profile + + @property + def title(self) -> str: + """The title of these highlights.""" + return self._node['title'] + + @property + def cover_url(self) -> str: + """URL of the highlights' cover.""" + return self._node['cover_media']['thumbnail_src'] + + @property + def cover_cropped_url(self) -> str: + """URL of the cropped version of the cover.""" + return self._node['cover_media_cropped_thumbnail']['url'] + + def _fetch_items(self): + if not self._items: + self._items = self._context.graphql_query("45246d3fe16ccc6577e0bd297a5db1ab", + {"reel_ids": [], "tag_names": [], "location_ids": [], + "highlight_reel_ids": [str(self.unique_id)], + "precomposed_overlay": False})['data']['reels_media'][0]['items'] + + @property + def itemcount(self) -> int: + """Count of items associated with the :class:`Highlight` instance.""" + self._fetch_items() + return len(self._items) + + def get_items(self) -> Iterator[StoryItem]: + """Retrieve all associated highlight items.""" + self._fetch_items() + yield from (StoryItem(self._context, item, self.owner_profile) for item in self._items) + + JsonExportable = Union[Post, Profile, StoryItem] diff --git a/test/instaloader_unittests.py b/test/instaloader_unittests.py index 5f2a0fb..aece971 100644 --- a/test/instaloader_unittests.py +++ b/test/instaloader_unittests.py @@ -8,6 +8,7 @@ from itertools import islice import instaloader +PROFILE_WITH_HIGHLIGHTS = 325732271 PUBLIC_PROFILE = "selenagomez" PUBLIC_PROFILE_ID = 460563723 HASHTAG = "kitten" @@ -102,6 +103,14 @@ class TestInstaloaderLoggedIn(TestInstaloaderAnonymously): for item in user_story.get_items(): print(item) + def test_highlights_paging(self): + for user_highlight in self.L.get_highlights(PROFILE_WITH_HIGHLIGHTS): + print("Retrieving {} highlights \"{}\" from profile {}".format(user_highlight.itemcount, + user_highlight.title, + user_highlight.owner_username)) + for item in user_highlight.get_items(): + print(item) + def test_private_profile_paging(self): self.post_paging_test(instaloader.Profile.from_username(self.L.context, PRIVATE_PROFILE).get_posts())