From c9e4f685971f0d7ec64095ade2abe69a55353ca3 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sun, 19 Apr 2020 10:31:58 +0200 Subject: [PATCH] Download Hashtag profilepic and save metadata JSON --- instaloader/__main__.py | 3 ++- instaloader/instaloader.py | 39 +++++++++++++++++++++++++++++++------- instaloader/structures.py | 6 +++--- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 6297c99..102ed8d 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -144,7 +144,8 @@ def _main(instaloader: Instaloader, targetlist: List[str], profiles.add(followee) elif target[0] == '#': instaloader.download_hashtag(hashtag=target[1:], max_count=max_count, fast_update=fast_update, - post_filter=post_filter) + post_filter=post_filter, + profile_pic=download_profile_pic, posts=download_posts) elif target[0] == '-': instaloader.download_post(Post.from_shortcode(instaloader.context, target[1:]), target) elif target[0] == "%": diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 3357bbd..9b9574b 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -358,7 +358,7 @@ class Instaloader: self.context.log('geo', end=' ', flush=True) @_retry_on_connection_error - def download_title_pic(self, url: str, target: Union[str, Path], name_suffix: str, owner_profile: Profile, + def download_title_pic(self, url: str, target: Union[str, Path], name_suffix: str, owner_profile: Optional[Profile], _attempt: int = 1) -> None: """Downloads and saves a picture that does not have an association with a Post or StoryItem, such as a Profile picture or a Highlight cover picture. Modification time is taken from the HTTP response headers. @@ -380,7 +380,8 @@ class Instaloader: pic_extension = 'jpg' if ((format_string_contains_key(self.dirname_pattern, 'profile') or format_string_contains_key(self.dirname_pattern, 'target'))): - filename = '{0}/{1}_{2}.{3}'.format(self.dirname_pattern.format(profile=owner_profile.username.lower(), + profile_str = owner_profile.username.lower() if owner_profile is not None else target + filename = '{0}/{1}_{2}.{3}'.format(self.dirname_pattern.format(profile=profile_str, target=target), pic_identifier, name_suffix, pic_extension) else: @@ -408,6 +409,12 @@ class Instaloader: .. versionadded:: 4.3""" self.download_title_pic(highlight.cover_url, target, 'cover', highlight.owner_profile) + def download_hashtag_profilepic(self, hashtag: Hashtag) -> None: + """Downloads and saves the profile picture of a Hashtag. + + .. versionadded:: 4.4""" + self.download_title_pic(hashtag.profile_pic_url, '#' + hashtag.name, 'profile_pic', None) + @_requires_login def save_session_to_file(self, filename: Optional[str] = None) -> None: """Saves internally stored :class:`requests.Session` object. @@ -888,7 +895,9 @@ class Instaloader: def download_hashtag(self, hashtag: Union[Hashtag, str], max_count: Optional[int] = None, post_filter: Optional[Callable[[Post], bool]] = None, - fast_update: bool = False) -> None: + fast_update: bool = False, + profile_pic: bool = True, + posts: bool = True) -> None: """Download pictures of one hashtag. To download the last 30 pictures with hashtag #cat, do:: @@ -900,14 +909,30 @@ class Instaloader: :param max_count: Maximum count of pictures to download :param post_filter: function(post), which returns True if given picture should be downloaded :param fast_update: If true, abort when first already-downloaded picture is encountered + :param profile_pic: not :option:`--no-profile-pic`. + :param posts: not :option:`--no-posts`. + + .. versionchanged:: 4.4 + Add parameters `profile_pic` and `posts`. """ if isinstance(hashtag, str): with self.context.error_catcher("Get hashtag #{}".format(hashtag)): hashtag = Hashtag.from_name(self.context, hashtag) - assert isinstance(hashtag, Hashtag) - self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name)) - self.posts_download_loop(hashtag.get_posts(), "#" + hashtag.name, fast_update, post_filter, - max_count=max_count) + if not isinstance(hashtag, Hashtag): + return + target = "#" + hashtag.name + if profile_pic: + with self.context.error_catcher("Download profile picture of {}".format(target)): + self.download_hashtag_profilepic(hashtag) + if posts: + self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name)) + self.posts_download_loop(hashtag.get_posts(), target, fast_update, post_filter, + max_count=max_count) + if self.save_metadata: + json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target, + target=target), + target) + self.save_metadata_json(json_filename, hashtag) def download_tagged(self, profile: Profile, fast_update: bool = False, target: Optional[str] = None, diff --git a/instaloader/structures.py b/instaloader/structures.py index 9f40cb9..8462a21 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1169,8 +1169,8 @@ class Hashtag: @property def name(self): - """Hashtag name, without preceeding '#'""" - return self._node["name"] + """Hashtag name lowercased, without preceeding '#'""" + return self._node["name"].lower() def _query(self, params): return self._context.get_json("explore/tags/{0}/".format(self.name), @@ -1189,7 +1189,7 @@ class Hashtag: def __eq__(self, other: object) -> bool: if isinstance(other, Hashtag): - return self.name.lower() == other.name.lower() + return self.name == other.name return NotImplemented def __hash__(self) -> int: