Have --storyitem-filter (--only-if for StoryItems)

This commit is contained in:
Alexander Graf 2018-04-19 15:12:05 +02:00
parent 7688bdce45
commit 7d2830b8b2
4 changed files with 74 additions and 45 deletions

View File

@ -83,14 +83,22 @@ automatically **finds it by its unique ID** and renames the folder likewise.
download stories. Requires :option:`--login`. Does not imply download stories. Requires :option:`--login`. Does not imply
:option:`--no-profile-pic`. :option:`--no-profile-pic`.
.. option:: --only-if filter .. option:: --post-filter filter, --only-if filter
Expression that, if given, must evaluate to True for each post to be Expression that, if given, must evaluate to True for each post to be
downloaded. Must be a syntactically valid Python expression. Variables are downloaded. Must be a syntactically valid Python expression. Variables are
evaluated to :class:`instaloader.Post` attributes. Example: evaluated to :class:`instaloader.Post` attributes. Example:
``--only-if=viewer_has_liked``. See :ref:`filter-posts` for more ``--post-filter=viewer_has_liked``. See :ref:`filter-posts` for more
examples. examples.
.. option:: --storyitem-filter filter
Expression that, if given, must evaluate to True for each storyitem to be
downloaded. Must be a syntactically valid Python expression. Variables are
evaluated to :class:`instaloader.StoryItem` attributes.
See :ref:`filter-posts` for more examples.
When to Stop Downloading When to Stop Downloading
^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -4,7 +4,7 @@ import ast
import os import os
import sys import sys
from argparse import ArgumentParser, SUPPRESS from argparse import ArgumentParser, SUPPRESS
from typing import Callable, List, Optional from typing import List, Optional
from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException, from . import (Instaloader, InstaloaderException, InvalidArgumentException, Post, Profile, ProfileNotExistsException,
StoryItem, __version__, load_structure_from_file) StoryItem, __version__, load_structure_from_file)
@ -23,8 +23,9 @@ def usage_string():
{0} --help""".format(argv0, len(argv0), '') {0} --help""".format(argv0, len(argv0), '')
def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post'], bool]: def filterstr_to_filterfunc(filter_str: str, item_type: type):
"""Takes an --only-if=... filter specification and makes a filter_func Callable out of it.""" """Takes an --post-filter=... or --storyitem-filter=... filter
specification and makes a filter_func Callable out of it."""
# The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A # The filter_str is parsed, then all names occurring in its AST are replaced by loads to post.<name>. A
# function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace. # function Post->bool is returned which evaluates the filter with the post as 'post' in its namespace.
@ -34,21 +35,20 @@ def filterstr_to_filterfunc(filter_str: str, logged_in: bool) -> Callable[['Post
# pylint:disable=no-self-use # pylint:disable=no-self-use
if not isinstance(node.ctx, ast.Load): if not isinstance(node.ctx, ast.Load):
raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id)) raise InvalidArgumentException("Invalid filter: Modifying variables ({}) not allowed.".format(node.id))
if not hasattr(Post, node.id): if not hasattr(item_type, node.id):
raise InvalidArgumentException("Invalid filter: Name {} is not defined.".format(node.id)) raise InvalidArgumentException("Invalid filter: {} not a {} attribute.".format(node.id,
if node.id in Post.LOGIN_REQUIRING_PROPERTIES and not logged_in: item_type.__name__))
raise InvalidArgumentException("Invalid filter: Name {} requires being logged in.".format(node.id)) new_node = ast.Attribute(ast.copy_location(ast.Name('item', ast.Load()), node), node.id,
new_node = ast.Attribute(ast.copy_location(ast.Name('post', ast.Load()), node), node.id,
ast.copy_location(ast.Load(), node)) ast.copy_location(ast.Load(), node))
return ast.copy_location(new_node, node) return ast.copy_location(new_node, node)
input_filename = '<--only-if parameter>' input_filename = '<command line filter parameter>'
compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')), compiled_filter = compile(TransformFilterAst().visit(ast.parse(filter_str, filename=input_filename, mode='eval')),
filename=input_filename, mode='eval') filename=input_filename, mode='eval')
def filterfunc(post: 'Post') -> bool: def filterfunc(item) -> bool:
# pylint:disable=eval-used # pylint:disable=eval-used
return bool(eval(compiled_filter, {'post': post})) return bool(eval(compiled_filter, {'item': item}))
return filterfunc return filterfunc
@ -59,14 +59,18 @@ def _main(instaloader: Instaloader, targetlist: List[str],
profile_pic: bool = True, profile_pic_only: bool = False, profile_pic: bool = True, profile_pic_only: bool = False,
fast_update: bool = False, fast_update: bool = False,
stories: bool = False, stories_only: bool = False, stories: bool = False, stories_only: bool = False,
filter_str: Optional[str] = None) -> None: post_filter_str: Optional[str] = None,
storyitem_filter_str: Optional[str] = None) -> None:
"""Download set of profiles, hashtags etc. and handle logging in and session files if desired.""" """Download set of profiles, hashtags etc. and handle logging in and session files if desired."""
# Parse and generate filter function # Parse and generate filter function
if filter_str is not None: post_filter = None
filter_func = filterstr_to_filterfunc(filter_str, username is not None) if post_filter_str is not None:
instaloader.context.log('Only download posts with property "{}".'.format(filter_str)) post_filter = filterstr_to_filterfunc(post_filter_str, Post)
else: instaloader.context.log('Only download posts with property "{}".'.format(post_filter_str))
filter_func = None storyitem_filter = None
if storyitem_filter_str is not None:
storyitem_filter = filterstr_to_filterfunc(storyitem_filter_str, StoryItem)
instaloader.context.log('Only download storyitems with property "{}".'.format(storyitem_filter_str))
# Login, if desired # Login, if desired
if username is not None: if username is not None:
try: try:
@ -90,9 +94,15 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.context.error_catcher(target): with instaloader.context.error_catcher(target):
structure = load_structure_from_file(instaloader.context, target) structure = load_structure_from_file(instaloader.context, target)
if isinstance(structure, Post): if isinstance(structure, Post):
if post_filter is not None and not post_filter(structure):
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
continue
instaloader.context.log("Downloading {} ({})".format(structure, target)) instaloader.context.log("Downloading {} ({})".format(structure, target))
instaloader.download_post(structure, os.path.dirname(target)) instaloader.download_post(structure, os.path.dirname(target))
elif isinstance(structure, StoryItem): elif isinstance(structure, StoryItem):
if storyitem_filter is not None and not storyitem_filter(structure):
instaloader.context.log("<{} ({}) skipped>".format(structure, target), flush=True)
continue
instaloader.context.log("Attempting to download {} ({})".format(structure, target)) instaloader.context.log("Attempting to download {} ({})".format(structure, target))
instaloader.download_storyitem(structure, os.path.dirname(target)) instaloader.download_storyitem(structure, os.path.dirname(target))
elif isinstance(structure, Profile): elif isinstance(structure, Profile):
@ -112,15 +122,15 @@ def _main(instaloader: Instaloader, targetlist: List[str],
profiles.update([followee.username for followee in followees]) profiles.update([followee.username for followee in followees])
elif target[0] == '#': elif target[0] == '#':
instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update, instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update,
filter_func=filter_func) post_filter=post_filter)
elif target == ":feed": elif target == ":feed":
instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count, instaloader.download_feed_posts(fast_update=fast_update, max_count=max_count,
filter_func=filter_func) post_filter=post_filter)
elif target == ":stories": elif target == ":stories":
instaloader.download_stories(fast_update=fast_update) instaloader.download_stories(fast_update=fast_update, storyitem_filter=storyitem_filter)
elif target == ":saved": elif target == ":saved":
instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count, instaloader.download_saved_posts(fast_update=fast_update, max_count=max_count,
filter_func=filter_func) post_filter=post_filter)
else: else:
profiles.add(target) profiles.add(target)
if len(profiles) > 1: if len(profiles) > 1:
@ -130,7 +140,8 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.context.error_catcher(target): with instaloader.context.error_catcher(target):
try: try:
instaloader.download_profile(target, profile_pic, profile_pic_only, fast_update, instaloader.download_profile(target, profile_pic, profile_pic_only, fast_update,
stories, stories_only, filter_func=filter_func) stories, stories_only, post_filter=post_filter,
storyitem_filter=storyitem_filter)
except ProfileNotExistsException as err: except ProfileNotExistsException as err:
if not instaloader.context.is_logged_in: if not instaloader.context.is_logged_in:
instaloader.context.log(err) instaloader.context.log(err)
@ -138,7 +149,7 @@ def _main(instaloader: Instaloader, targetlist: List[str],
with instaloader.anonymous_copy() as anonymous_loader: with instaloader.anonymous_copy() as anonymous_loader:
with instaloader.context.error_catcher(): with instaloader.context.error_catcher():
anonymous_loader.download_profile(target, profile_pic, profile_pic_only, anonymous_loader.download_profile(target, profile_pic, profile_pic_only,
fast_update, filter_func=filter_func) fast_update, post_filter=post_filter)
else: else:
raise raise
except KeyboardInterrupt: except KeyboardInterrupt:
@ -210,10 +221,14 @@ def main():
g_what.add_argument('--stories-only', action='store_true', g_what.add_argument('--stories-only', action='store_true',
help='Rather than downloading regular posts of each specified profile, only download ' help='Rather than downloading regular posts of each specified profile, only download '
'stories. Requires --login. Does not imply --no-profile-pic.') 'stories. Requires --login. Does not imply --no-profile-pic.')
g_what.add_argument('--only-if', metavar='filter', g_what.add_argument('--post-filter', '--only-if', metavar='filter',
help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be ' help='Expression that, if given, must evaluate to True for each post to be downloaded. Must be '
'a syntactically valid python expression. Variables are evaluated to ' 'a syntactically valid python expression. Variables are evaluated to '
'instaloader.Post attributes. Example: --only-if=viewer_has_liked.') 'instaloader.Post attributes. Example: --post-filter=viewer_has_liked.')
g_what.add_argument('--storyitem-filter', metavar='filter',
help='Expression that, if given, must evaluate to True for each storyitem to be downloaded. '
'Must be a syntactically valid python expression. Variables are evaluated to '
'instaloader.StoryItem attributes.')
g_stop = parser.add_argument_group('When to Stop Downloading', g_stop = parser.add_argument_group('When to Stop Downloading',
'If none of these options are given, Instaloader goes through all pictures ' 'If none of these options are given, Instaloader goes through all pictures '
@ -279,7 +294,7 @@ def main():
if ':feed-all' in args.profile or ':feed-liked' in args.profile: if ':feed-all' in args.profile or ':feed-liked' in args.profile:
raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and " raise SystemExit(":feed-all and :feed-liked were removed. Use :feed as target and "
"eventually --only-if=viewer_has_liked.") "eventually --post-filter=viewer_has_liked.")
post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None post_metadata_txt_pattern = '\n'.join(args.post_metadata_txt) if args.post_metadata_txt else None
storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None storyitem_metadata_txt_pattern = '\n'.join(args.storyitem_metadata_txt) if args.storyitem_metadata_txt else None
@ -304,7 +319,8 @@ def main():
fast_update=args.fast_update, fast_update=args.fast_update,
stories=args.stories, stories=args.stories,
stories_only=args.stories_only, stories_only=args.stories_only,
filter_str=args.only_if) post_filter_str=args.post_filter,
storyitem_filter_str=args.storyitem_filter)
loader.close() loader.close()
except InstaloaderException as err: except InstaloaderException as err:
raise SystemExit("Fatal error: %s" % err) raise SystemExit("Fatal error: %s" % err)

View File

@ -372,7 +372,8 @@ class Instaloader:
def download_stories(self, def download_stories(self,
userids: Optional[List[int]] = None, userids: Optional[List[int]] = None,
fast_update: bool = False, fast_update: bool = False,
filename_target: str = ':stories') -> None: filename_target: str = ':stories',
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None:
""" """
Download available stories from user followees or all stories of users whose ID are given. Download available stories from user followees or all stories of users whose ID are given.
Does not mark stories as seen. Does not mark stories as seen.
@ -381,6 +382,7 @@ class Instaloader:
:param userids: List of user IDs to be processed in terms of downloading their stories :param userids: List of user IDs to be processed in terms of downloading their stories
:param fast_update: If true, abort when first already-downloaded picture is encountered :param fast_update: If true, abort when first already-downloaded picture is encountered
:param filename_target: Replacement for {target} in dirname_pattern and filename_pattern :param filename_target: Replacement for {target} in dirname_pattern and filename_pattern
:param storyitem_filter: function(storyitem), which returns True if given StoryItem should be downloaded
""" """
if not userids: if not userids:
@ -392,6 +394,9 @@ class Instaloader:
totalcount = user_story.itemcount totalcount = user_story.itemcount
count = 1 count = 1
for item in user_story.get_items(): for item in user_story.get_items():
if storyitem_filter is not None and not storyitem_filter(item):
self.context.log("<{} skipped>".format(item), flush=True)
continue
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count += 1 count += 1
with self.context.error_catcher('Download story from user {}'.format(name)): with self.context.error_catcher('Download story from user {}'.format(name)):
@ -451,7 +456,7 @@ class Instaloader:
@_requires_login @_requires_login
def download_feed_posts(self, max_count: int = None, fast_update: bool = False, def download_feed_posts(self, max_count: int = None, fast_update: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None: post_filter: Optional[Callable[[Post], bool]] = None) -> None:
""" """
Download pictures from the user's feed. Download pictures from the user's feed.
@ -460,11 +465,11 @@ class Instaloader:
loader = Instaloader() loader = Instaloader()
loader.load_session_from_file('USER') loader.load_session_from_file('USER')
loader.download_feed_posts(max_count=20, fast_update=True, loader.download_feed_posts(max_count=20, fast_update=True,
filter_func=lambda post: post.viewer_has_liked) post_filter=lambda post: post.viewer_has_liked)
:param max_count: Maximum count of pictures to download :param max_count: Maximum count of pictures to download
:param fast_update: If true, abort when first already-downloaded picture is encountered :param fast_update: If true, abort when first already-downloaded picture is encountered
:param filter_func: function(post), which returns True if given picture should be downloaded :param post_filter: function(post), which returns True if given picture should be downloaded
""" """
self.context.log("Retrieving pictures from your feed...") self.context.log("Retrieving pictures from your feed...")
count = 1 count = 1
@ -472,7 +477,7 @@ class Instaloader:
if max_count is not None and count > max_count: if max_count is not None and count > max_count:
break break
name = post.owner_username name = post.owner_username
if filter_func is not None and not filter_func(post): if post_filter is not None and not post_filter(post):
self.context.log("<pic by %s skipped>" % name, flush=True) self.context.log("<pic by %s skipped>" % name, flush=True)
continue continue
self.context.log("[%3i] %s " % (count, name), end="", flush=True) self.context.log("[%3i] %s " % (count, name), end="", flush=True)
@ -484,12 +489,12 @@ class Instaloader:
@_requires_login @_requires_login
def download_saved_posts(self, max_count: int = None, fast_update: bool = False, def download_saved_posts(self, max_count: int = None, fast_update: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None: post_filter: Optional[Callable[[Post], bool]] = None) -> None:
"""Download user's saved pictures. """Download user's saved pictures.
:param max_count: Maximum count of pictures to download :param max_count: Maximum count of pictures to download
:param fast_update: If true, abort when first already-downloaded picture is encountered :param fast_update: If true, abort when first already-downloaded picture is encountered
:param filter_func: function(post), which returns True if given picture should be downloaded :param post_filter: function(post), which returns True if given picture should be downloaded
""" """
self.context.log("Retrieving saved posts...") self.context.log("Retrieving saved posts...")
count = 1 count = 1
@ -497,7 +502,7 @@ class Instaloader:
if max_count is not None and count > max_count: if max_count is not None and count > max_count:
break break
name = post.owner_username name = post.owner_username
if filter_func is not None and not filter_func(post): if post_filter is not None and not post_filter(post):
self.context.log("<pic by {} skipped".format(name), flush=True) self.context.log("<pic by {} skipped".format(name), flush=True)
continue continue
self.context.log("[{:>3}] {} ".format(count, name), end=str(), flush=True) self.context.log("[{:>3}] {} ".format(count, name), end=str(), flush=True)
@ -534,7 +539,7 @@ class Instaloader:
def download_hashtag(self, hashtag: str, def download_hashtag(self, hashtag: str,
max_count: Optional[int] = None, max_count: Optional[int] = None,
filter_func: Optional[Callable[[Post], bool]] = None, post_filter: Optional[Callable[[Post], bool]] = None,
fast_update: bool = False) -> None: fast_update: bool = False) -> None:
"""Download pictures of one hashtag. """Download pictures of one hashtag.
@ -545,7 +550,7 @@ class Instaloader:
:param hashtag: Hashtag to download, without leading '#' :param hashtag: Hashtag to download, without leading '#'
:param max_count: Maximum count of pictures to download :param max_count: Maximum count of pictures to download
:param filter_func: function(post), which returns True if given picture should be downloaded :param post_filter: function(post), which returns True if given picture should be downloaded
:param fast_update: If true, abort when first already-downloaded picture is encountered :param fast_update: If true, abort when first already-downloaded picture is encountered
""" """
hashtag = hashtag.lower() hashtag = hashtag.lower()
@ -555,7 +560,7 @@ class Instaloader:
if max_count is not None and count > max_count: if max_count is not None and count > max_count:
break break
self.context.log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True) self.context.log('[{0:3d}] #{1} '.format(count, hashtag), end='', flush=True)
if filter_func is not None and not filter_func(post): if post_filter is not None and not post_filter(post):
self.context.log('<skipped>') self.context.log('<skipped>')
continue continue
count += 1 count += 1
@ -626,7 +631,8 @@ class Instaloader:
profile_pic: bool = True, profile_pic_only: bool = False, profile_pic: bool = True, profile_pic_only: bool = False,
fast_update: bool = False, fast_update: bool = False,
download_stories: bool = False, download_stories_only: bool = False, download_stories: bool = False, download_stories_only: bool = False,
filter_func: Optional[Callable[[Post], bool]] = None) -> None: post_filter: Optional[Callable[[Post], bool]] = None,
storyitem_filter: Optional[Callable[[StoryItem], bool]] = None) -> None:
"""Download one profile""" """Download one profile"""
# Get profile main page json # Get profile main page json
@ -667,7 +673,8 @@ class Instaloader:
# Download stories, if requested # Download stories, if requested
if download_stories or download_stories_only: if download_stories or download_stories_only:
with self.context.error_catcher("Download stories of {}".format(profile_name)): with self.context.error_catcher("Download stories of {}".format(profile_name)):
self.download_stories(userids=[profile.userid], filename_target=profile_name, fast_update=fast_update) self.download_stories(userids=[profile.userid], filename_target=profile_name, fast_update=fast_update,
storyitem_filter=storyitem_filter)
if download_stories_only: if download_stories_only:
return return
@ -678,7 +685,7 @@ class Instaloader:
for post in profile.get_posts(): for post in profile.get_posts():
self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True) self.context.log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count += 1 count += 1
if filter_func is not None and not filter_func(post): if post_filter is not None and not post_filter(post):
self.context.log('<skipped>') self.context.log('<skipped>')
continue continue
with self.context.error_catcher('Download profile {}'.format(profile_name)): with self.context.error_catcher('Download profile {}'.format(profile_name)):

View File

@ -36,8 +36,6 @@ class Post:
parameter and exported into JSON files with :option:`--metadata-json`. parameter and exported into JSON files with :option:`--metadata-json`.
""" """
LOGIN_REQUIRING_PROPERTIES = ["viewer_has_liked"]
def __init__(self, context: InstaloaderContext, node: Dict[str, Any], def __init__(self, context: InstaloaderContext, node: Dict[str, Any],
owner_profile: Optional['Profile'] = None): owner_profile: Optional['Profile'] = None):
"""Create a Post instance from a node structure as returned by Instagram. """Create a Post instance from a node structure as returned by Instagram.