Filter posts with --only-if=FILTER

where FILTER is a boolean expression in python syntax where all names
are evaluated to instaloader.Post properties.

Examples:

instaloader --login=USER --only-if='viewer_has_liked' :feed

instaloader --only-if='likes>1000 and comments>5' profile
This commit is contained in:
Alexander Graf 2017-08-19 16:14:18 +02:00
parent 09d2592635
commit ee9993d7c2
2 changed files with 112 additions and 35 deletions

View File

@ -97,17 +97,26 @@ If you want to **download all followees of a given profile**, call
instaloader --login=your_username @profile instaloader --login=your_username @profile
To **download all the pictures from your feed which you have liked**, call To **download all pictures from your feed**:
:: ::
instaloader --login=your_username :feed-liked instaloader --login=your_username :feed
or to **download all pictures from your feed**:
or to **download all the pictures from your feed that you have liked**, call
:: ::
instaloader --login=your_username :feed-all instaloader --login=your_username --only-if=viewer_has_liked :feed
The ``--only-if`` option allows to **filter media by custom criterias**. For
example you might only want to download posts that you either liked or were
liked and commented by many others:
::
instaloader --login=your_username --only-if="viewer_has_liked or (likes>1500 and comments>10)" profile
**Download all stories** from the profiles you follow: **Download all stories** from the profiles you follow:
@ -134,8 +143,7 @@ has been renamed, Instaloader automatically **finds it by its unique ID** and
renames the folder likewise. renames the folder likewise.
Instead of a *profile* or a *#hashtag*, the special targets Instead of a *profile* or a *#hashtag*, the special targets
``:feed-all`` (pictures from your feed), ``:feed`` (pictures from your feed) and
``:feed-liked`` (pictures from your feed which you liked), and
``:stories`` (stories of your followees) can be specified. ``:stories`` (stories of your followees) can be specified.
--profile-pic-only Only download profile picture. --profile-pic-only Only download profile picture.
@ -157,6 +165,12 @@ Instead of a *profile* or a *#hashtag*, the special targets
--stories-only Rather than downloading regular posts of each --stories-only Rather than downloading regular posts of each
specified profile, only download stories. specified profile, only download stories.
Requires ``--login``. Requires ``--login``.
--only-if filter Expression that, if given, must evaluate to True for each post to
be downloaded. Must be a syntactically valid python
expression. Variables are evaluated to
``instaloader.Post`` attributes.
Example: ``--only-if=viewer_has_liked``.
When to Stop Downloading When to Stop Downloading
^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -1,7 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Download pictures (or videos) along with their captions and other metadata from Instagram.""" """Download pictures (or videos) along with their captions and other metadata from Instagram."""
import ast
import copy
import getpass import getpass
import json import json
import os import os
@ -123,6 +124,42 @@ def format_string_contains_key(format_string: str, key: str) -> bool:
return False return False
def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post'], bool]:
"""Takes an --only-if=... filter specification and makes a filter_func Callable out of it."""
class VerifyFilter(ast.NodeVisitor):
def visit_Name(self, node: ast.Name):
# pylint:disable=invalid-name
if not isinstance(node.ctx, ast.Load):
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
if not hasattr(Post, node.id):
raise InvalidArgumentException("Invalid filter: Name {} is not defined.".format(node.id))
if node.id in Post.LOGIN_REQUIRING_PROPERTIES and not logged_in:
raise InvalidArgumentException("Invalid filter: Name {} requires being logged in.".format(node.id))
return self.generic_visit(node)
filter_ast = ast.parse(filter_str, filename='<--only-if parameter>', mode='eval')
VerifyFilter().visit(filter_ast)
def filterfunc(post: 'Post') -> bool:
class EvaluatePostAttributes(ast.NodeTransformer):
def visit_Name(self, node: ast.Name):
# pylint:disable=invalid-name,no-self-use
obj = post.__getattribute__(node.id)
if isinstance(obj, str):
new_node = ast.Str(obj)
elif isinstance(obj, int) and not isinstance(obj, bool):
new_node = ast.Num(obj)
else: # True, False or None
new_node = ast.NameConstant(obj)
return ast.copy_location(new_node, node)
ast_obj = EvaluatePostAttributes().visit(copy.deepcopy(filter_ast))
# pylint:disable=eval-used
return bool(eval(compile(ast_obj, '', 'eval'), {}))
return filterfunc
class Post: class Post:
""" """
Structure containing information about an Instagram post. Structure containing information about an Instagram post.
@ -132,6 +169,8 @@ class Post:
This class unifies access to the properties associated with a post. It implements == and is hashable. This class unifies access to the properties associated with a post. It implements == and is hashable.
""" """
LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"]
def __init__(self, instaloader: 'Instaloader', node: Dict[str, Any], profile: Optional[str] = None): def __init__(self, instaloader: 'Instaloader', node: Dict[str, Any], profile: Optional[str] = None):
"""Create a Post instance from a node structure as returned by Instagram. """Create a Post instance from a node structure as returned by Instagram.
@ -182,15 +221,19 @@ class Post:
self._full_metadata_dict = pic_json["media"] self._full_metadata_dict = pic_json["media"]
return self._full_metadata_dict return self._full_metadata_dict
def __getitem__(self, item):
"""Implements self[item]. This must not be used from outside of Post. Use the properties instead."""
if item in self._node:
return self._node[item]
return self._full_metadata[item]
@property @property
def owner_username(self) -> str: def owner_username(self) -> str:
"""The Post's lowercase owner name, or 'UNKNOWN'.""" """The Post's lowercase owner name, or 'UNKNOWN'."""
try: try:
if self._profile: if self._profile:
return self._profile.lower() return self._profile.lower()
if 'owner' in self._node and 'username' in self._node['owner']: return self['owner']['username'].lower()
return self._node['owner']['username'].lower()
return self._full_metadata['owner']['username'].lower()
except (InstaloaderException, KeyError, TypeError) as err: except (InstaloaderException, KeyError, TypeError) as err:
self._instaloader.error("Get owner name of {}: {} -- using \'UNKNOWN\'.".format(self, err)) self._instaloader.error("Get owner name of {}: {} -- using \'UNKNOWN\'.".format(self, err))
return 'UNKNOWN' return 'UNKNOWN'
@ -213,7 +256,7 @@ class Post:
@property @property
def sidecar_edges(self) -> List[Dict[str, Any]]: def sidecar_edges(self) -> List[Dict[str, Any]]:
return self._full_metadata['edge_sidecar_to_children']['edges'] return self['edge_sidecar_to_children']['edges']
@property @property
def caption(self) -> Optional[str]: def caption(self) -> Optional[str]:
@ -228,7 +271,7 @@ class Post:
@property @property
def video_url(self) -> str: def video_url(self) -> str:
return self._full_metadata['video_url'] return self['video_url']
@property @property
def viewer_has_liked(self) -> bool: def viewer_has_liked(self) -> bool:
@ -237,15 +280,24 @@ class Post:
:raises LoginRequiredException: if not logged in.""" :raises LoginRequiredException: if not logged in."""
if not self._instaloader.is_logged_in: if not self._instaloader.is_logged_in:
raise LoginRequiredException("Login required to obtain whether viewer has liked {}.".format(self)) raise LoginRequiredException("Login required to obtain whether viewer has liked {}.".format(self))
if 'likes' in self._node: if 'likes' in self._node and 'viewer_has_liked' in self._node['likes']:
return self._node['likes']['viewer_has_liked'] return self._node['likes']['viewer_has_liked']
if 'viewer_has_liked' in self._node: return self['viewer_has_liked']
return self._node['viewer_has_liked']
return self._full_metadata['viewer_has_liked'] @property
def likes(self) -> int:
"""Likes count"""
return self['edge_media_preview_like']['count']
@property
def comments(self) -> int:
"""Comment count"""
return self['edge_media_to_comment']['count']
def get_comments(self) -> Iterator[Dict[str, Any]]: def get_comments(self) -> Iterator[Dict[str, Any]]:
comments_in_metadata = self._full_metadata['edge_media_to_comment'] """Iterate over all comments of the post."""
if comments_in_metadata['count'] == len(comments_in_metadata['edges']): comments_in_metadata = self['edge_media_to_comment']
if self.comments == len(comments_in_metadata['edges']):
# If the Post's metadata already contains all comments, don't do GraphQL requests to obtain them # If the Post's metadata already contains all comments, don't do GraphQL requests to obtain them
yield from (comment['node'] for comment in comments_in_metadata['edges']) yield from (comment['node'] for comment in comments_in_metadata['edges'])
yield from self._instaloader.graphql_node_list(17852405266163336, {'shortcode': self.shortcode}, yield from self._instaloader.graphql_node_list(17852405266163336, {'shortcode': self.shortcode},
@ -254,7 +306,7 @@ class Post:
def get_location(self) -> Optional[Dict[str, str]]: def get_location(self) -> Optional[Dict[str, str]]:
"""If the Post has a location, returns a dictionary with fields 'lat' and 'lng'.""" """If the Post has a location, returns a dictionary with fields 'lat' and 'lng'."""
loc_dict = self._full_metadata["location"] loc_dict = self["location"]
if loc_dict is not None: if loc_dict is not None:
location_json = self._instaloader.get_json("explore/locations/{0}/".format(loc_dict["id"]), location_json = self._instaloader.get_json("explore/locations/{0}/".format(loc_dict["id"]),
params={'__a': 1}) params={'__a': 1})
@ -1110,8 +1162,15 @@ class Instaloader:
def main(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None, def main(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
sessionfile: Optional[str] = None, max_count: Optional[int] = None, sessionfile: Optional[str] = None, max_count: Optional[int] = None,
profile_pic_only: bool = False, fast_update: bool = False, profile_pic_only: bool = False, fast_update: bool = False,
stories: bool = False, stories_only: bool = False) -> None: stories: bool = False, stories_only: bool = False,
filter_str: Optional[str] = None) -> None:
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired.""" """Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
# Parse and generate filter function
if filter_str is not None:
filter_func = filterstr_to_filterfunc(filter_str, username is not None)
self._log('Only download posts with property "{}".'.format(filter_str))
else:
filter_func = None
# Login, if desired # Login, if desired
if username is not None: if username is not None:
try: try:
@ -1134,7 +1193,8 @@ class Instaloader:
if pentry[0] == '#': if pentry[0] == '#':
self._log("Retrieving pictures with hashtag {0}".format(pentry)) self._log("Retrieving pictures with hashtag {0}".format(pentry))
with self._error_catcher(): with self._error_catcher():
self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update) self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
filter_func=filter_func)
elif pentry[0] == '@': elif pentry[0] == '@':
if username is not None: if username is not None:
self._log("Retrieving followees of %s..." % pentry[1:]) self._log("Retrieving followees of %s..." % pentry[1:])
@ -1143,19 +1203,12 @@ class Instaloader:
targets.update([followee['username'] for followee in followees]) targets.update([followee['username'] for followee in followees])
else: else:
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
elif pentry == ":feed-all": elif pentry == ":feed":
if username is not None: if username is not None:
self._log("Retrieving pictures from your feed...") self._log("Retrieving pictures from your feed...")
with self._error_catcher():
self.download_feed_posts(fast_update=fast_update, max_count=max_count)
else:
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
elif pentry == ":feed-liked":
if username is not None:
self._log("Retrieving pictures you liked from your feed...")
with self._error_catcher(): with self._error_catcher():
self.download_feed_posts(fast_update=fast_update, max_count=max_count, self.download_feed_posts(fast_update=fast_update, max_count=max_count,
filter_func=lambda post: post.viewer_has_liked) filter_func=filter_func)
else: else:
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
elif pentry == ":stories": elif pentry == ":stories":
@ -1167,19 +1220,21 @@ class Instaloader:
else: else:
targets.add(pentry) targets.add(pentry)
if len(targets) > 1: if len(targets) > 1:
self._log("Downloading %i profiles..." % len(targets)) self._log("Downloading {} profiles: {}".format(len(targets), ','.join(targets)))
# Iterate through targets list and download them # Iterate through targets list and download them
for target in targets: for target in targets:
with self._error_catcher(): with self._error_catcher():
try: try:
self.download_profile(target, profile_pic_only, fast_update, stories, stories_only) self.download_profile(target, profile_pic_only, fast_update, stories, stories_only,
filter_func=filter_func)
except ProfileNotExistsException as err: except ProfileNotExistsException as err:
if username is not None: if username is not None:
self._log(err) self._log(err)
self._log("Trying again anonymously, helps in case you are just blocked.") self._log("Trying again anonymously, helps in case you are just blocked.")
with self.anonymous_copy() as anonymous_loader: with self.anonymous_copy() as anonymous_loader:
with self._error_catcher(): with self._error_catcher():
anonymous_loader.download_profile(target, profile_pic_only, fast_update) anonymous_loader.download_profile(target, profile_pic_only, fast_update,
filter_func=filter_func)
else: else:
raise err raise err
except KeyboardInterrupt: except KeyboardInterrupt:
@ -1207,7 +1262,7 @@ def main():
g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag', g_what.add_argument('profile', nargs='*', metavar='profile|#hashtag',
help='Name of profile or #hashtag to download. ' help='Name of profile or #hashtag to download. '
'Alternatively, if --login is given: @<profile> to download all followees of ' 'Alternatively, if --login is given: @<profile> to download all followees of '
'<profile>; the special targets :feed-all or :feed-liked to ' '<profile>; the special targets :feed to '
'download pictures from your feed; or :stories to download the stories of your ' 'download pictures from your feed; or :stories to download the stories of your '
'followees.') 'followees.')
g_what.add_argument('-P', '--profile-pic-only', action='store_true', g_what.add_argument('-P', '--profile-pic-only', action='store_true',
@ -1232,6 +1287,10 @@ def main():
g_what.add_argument('--stories-only', action='store_true', g_what.add_argument('--stories-only', action='store_true',
help='Rather than downloading regular posts of each specified profile, only download ' help='Rather than downloading regular posts of each specified profile, only download '
'stories. Requires --login.') 'stories. Requires --login.')
g_what.add_argument('--only-if', metavar='filter',
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
'a syntactically valid python expression. Variables are evaluated to '
'instaloader.Post attributes. Example: --only-if=viewer_has_liked.')
g_stop = parser.add_argument_group('When to Stop Downloading', g_stop = parser.add_argument_group('When to Stop Downloading',
'If none of these options are given, Instaloader goes through all pictures ' 'If none of these options are given, Instaloader goes through all pictures '
@ -1294,6 +1353,10 @@ def main():
if args.stories_only: if args.stories_only:
raise SystemExit(1) raise SystemExit(1)
if ':feed-all' in args.profile or ':feed-liked' in args.profile:
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
"eventually --only-if=viewer_has_liked.")
download_videos = Tristate.always if not args.skip_videos else Tristate.no_extra_query download_videos = Tristate.always if not args.skip_videos else Tristate.no_extra_query
download_comments = Tristate.always if args.comments else Tristate.no_extra_query download_comments = Tristate.always if args.comments else Tristate.no_extra_query
download_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never download_captions = Tristate.no_extra_query if not args.no_captions else Tristate.never
@ -1315,7 +1378,7 @@ def main():
loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password, loader.main(args.profile, args.login.lower() if args.login is not None else None, args.password,
args.sessionfile, args.sessionfile,
int(args.count) if args.count is not None else None, int(args.count) if args.count is not None else None,
args.profile_pic_only, args.fast_update, args.stories, args.stories_only) args.profile_pic_only, args.fast_update, args.stories, args.stories_only, args.only_if)
except InstaloaderException as err: except InstaloaderException as err:
raise SystemExit("Fatal error: %s" % err) raise SystemExit("Fatal error: %s" % err)