Merge branch 'master' into upcoming/v4.5

2020-07-18 15:32:05 +02:00
parent fbe05a1add e232c82d5e
commit c817d1901a
7 changed files with 74 additions and 54 deletions
--- a/docs/_static/instaloader.css
+++ b/docs/_static/instaloader.css
@@ -17,7 +17,9 @@ code {
  .highlight pre {
    padding: 0.7em;
    color: #fff; }
-  .highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
+  .highlight .c1 {
    color: #666; }
  .highlight .k, .highlight .kn, .highlight .ow {
    color: #008d06; }
  .highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
    color: #f48400; }
--- a/docs/_static/instaloader.scss
+++ b/docs/_static/instaloader.scss
@@ -36,7 +36,11 @@ code {
    color: #fff;
  }
-  .c1, .k, .kn, .ow {
+  .c1 {
    color: #666;
  }
  .k, .kn, .ow {
    color: $color_instaloader_main
  }
--- a/docs/codesnippets.rst
+++ b/docs/codesnippets.rst
@@ -28,8 +28,9 @@ Download Posts in a Specific Period
 -----------------------------------
 To only download Instagram pictures (and metadata) that are within a specific
-period, you can play around with :func:`~itertools.dropwhile` and
+period, you can simply use :func:`~itertools.dropwhile` and
-:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
+:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
 Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
 .. literalinclude:: codesnippets/121_since_until.py
@@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.
 Discussed in :issue:`121`.
 The code example with :func:`~itertools.dropwhile` and
 :func:`~itertools.takewhile` makes the assumption that the post iterator returns
 posts in exact chronological order.  As discussed in :issue:`666`, the following
 approach fits for an **almost chronological order**, where up to *k* older posts
 are inserted into an otherwise chronological order, such as an Hashtag feed.
 .. literalinclude:: codesnippets/666_historical_hashtag_data.py
 Likes of a Profile / Ghost Followers
 ------------------------------------
--- a/docs/codesnippets/121_since_until.py
+++ b/docs/codesnippets/121_since_until.py
@@ -5,13 +5,11 @@ import instaloader
 L = instaloader.Instaloader()
-posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
+posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
 # or
 # posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
 SINCE = datetime(2015, 5, 1)
 UNTIL = datetime(2015, 3, 1)
 for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
    print(post.date)
-    L.download_post(post, '#urbanphotography')
+    L.download_post(post, "instagram")
--- a/docs/codesnippets/666_historical_hashtag_data.py
+++ b/docs/codesnippets/666_historical_hashtag_data.py
@@ -0,0 +1,30 @@
 from datetime import datetime
 import instaloader
 L = instaloader.Instaloader()
 posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
 SINCE = datetime(2020, 5, 10)  # further from today, inclusive
 UNTIL = datetime(2020, 5, 11)  # closer to today, not inclusive
 k = 0  # initiate k
 k_list = []  # uncomment this to tune k
 for post in posts:
    postdate = post.date
    if postdate > UNTIL:
        continue
    elif postdate <= SINCE:
        k += 1
        if k == 50:
            break
        else:
            continue
    else:
        L.download_post(post, "#urbanphotography")
        k = 0  # set k to 0
        # if you want to tune k, uncomment below to get your k max
        #k_list.append(k)
 #max(k_list)
--- a/instaloader/init.py
+++ b/instaloader/init.py
@@ -1,7 +1,7 @@
 """Download pictures (or videos) along with their captions and other metadata from Instagram."""
-__version__ = '4.4.4'
+__version__ = '4.4.5'
 try:
--- a/instaloader/structures.py
+++ b/instaloader/structures.py
@@ -69,7 +69,6 @@ class Post:
        self._node = node
        self._owner_profile = owner_profile
        self._full_metadata_dict = None  # type: Optional[Dict[str, Any]]
        self._rhx_gis_str = None         # type: Optional[str]
        self._location = None            # type: Optional[PostLocation]
        self._iphone_struct_ = None
        if 'iphone_struct' in node:
@@ -142,9 +141,11 @@ class Post:
    def _obtain_metadata(self):
        if not self._full_metadata_dict:
-            pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
+            pic_json = self._context.graphql_query(
-            self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
+                '2b0673e0dc4580674a88d426fe00ea90',
-            self._rhx_gis_str = pic_json.get('rhx_gis')
+                {'shortcode': self.shortcode}
            )
            self._full_metadata_dict = pic_json['data']['shortcode_media']
            if self._full_metadata_dict is None:
                # issue #449
                self._context.error("Fetching Post metadata failed (issue #449). "
@@ -161,11 +162,6 @@ class Post:
        assert self._full_metadata_dict is not None
        return self._full_metadata_dict
    @property
    def _rhx_gis(self) -> Optional[str]:
        self._obtain_metadata()
        return self._rhx_gis_str
    @property
    def _iphone_struct(self) -> Dict[str, Any]:
        if not self._context.is_logged_in:
@@ -392,7 +388,7 @@ class Post:
                                     created_at_utc=datetime.utcfromtimestamp(node['created_at']),
                                     text=node['text'],
                                     owner=Profile(self._context, node['owner']),
-                                     likes_count=node['edge_liked_by']['count'])
+                                     likes_count=node.get('edge_liked_by', {}).get('count', 0))
        def _postcommentanswers(node):
            if 'edge_threaded_comments' not in node:
@@ -418,14 +414,9 @@ class Post:
        if self.comments == 0:
            # Avoid doing additional requests if there are no comments
            return
-        try:
+
-            comment_edges = self._field('edge_media_to_parent_comment', 'edges')
+        comment_edges = self._field('edge_media_to_comment', 'edges')
-            answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
+        answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
            threaded_comments_available = True
        except KeyError:
            comment_edges = self._field('edge_media_to_comment', 'edges')
            answers_count = 0
            threaded_comments_available = False
        if self.comments == len(comment_edges) + answers_count:
            # If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
@@ -433,14 +424,10 @@ class Post:
            return
        yield from (_postcomment(node) for node in
                    self._context.graphql_node_list(
-                        "97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
+                        "97b41c52301f77ce508f55e66d17620e",
                        else "f0986789a5c5d17c2400faebf16efd0d",
                        {'shortcode': self.shortcode},
                        'https://www.instagram.com/p/' + self.shortcode + '/',
-                        lambda d:
+                        lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
                        d['data']['shortcode_media'][
                            'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
                        self._rhx_gis))
    def get_likes(self) -> Iterator['Profile']:
        """Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
@@ -455,8 +442,7 @@ class Post:
        yield from (Profile(self._context, node) for node in
                    self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
                                                    'https://www.instagram.com/p/' + self.shortcode + '/',
-                                                    lambda d: d['data']['shortcode_media']['edge_liked_by'],
+                                                    lambda d: d['data']['shortcode_media']['edge_liked_by']))
                                                    self._rhx_gis))
    @property
    def is_sponsored(self) -> bool:
@@ -537,7 +523,6 @@ class Profile:
        self._has_public_story = None  # type: Optional[bool]
        self._node = node
        self._has_full_metadata = False
        self._rhx_gis = None
        self._iphone_struct_ = None
        if 'iphone_struct' in node:
            # if loaded from JSON with load_structure_from_file()
@@ -599,10 +584,9 @@ class Profile:
    def _obtain_metadata(self):
        try:
            if not self._has_full_metadata:
-                metadata = self._context.get_json('{}/'.format(self.username), params={})
+                metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
                self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
                self._has_full_metadata = True
                self._rhx_gis = metadata.get('rhx_gis')
        except (QueryReturnedNotFoundException, KeyError) as err:
            top_search_results = TopSearchResults(self._context, self.username)
            similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
@@ -735,8 +719,7 @@ class Profile:
                                                        'include_reel': False, 'include_suggested_users': False,
                                                        'include_logged_out_extras': True,
                                                        'include_highlight_reels': False},
-                                                       'https://www.instagram.com/{}/'.format(self.username),
+                                                       'https://www.instagram.com/{}/'.format(self.username))
                                                       self._rhx_gis)
            self._has_public_story = data['data']['user']['has_public_story']
        assert self._has_public_story is not None
        return self._has_public_story
@@ -795,8 +778,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_owner_to_timeline_media'],
-                                                    self._rhx_gis,
+                                                    first_data=self._metadata('edge_owner_to_timeline_media')))
                                                    self._metadata('edge_owner_to_timeline_media')))
    def get_saved_posts(self) -> Iterator[Post]:
        """Get Posts that are marked as saved by the user."""
@@ -810,8 +792,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_saved_media'],
-                                                    self._rhx_gis,
+                                                    first_data=self._metadata('edge_saved_media')))
                                                    self._metadata('edge_saved_media')))
    def get_tagged_posts(self) -> Iterator[Post]:
        """Retrieve all posts where a profile is tagged.
@@ -822,8 +803,7 @@ class Profile:
                    self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/'.format(self.username),
-                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you'],
+                                                    lambda d: d['data']['user']['edge_user_to_photos_of_you']))
                                                    self._rhx_gis))
    def get_igtv_posts(self) -> Iterator[Post]:
        """Retrieve all IGTV posts.
@@ -835,8 +815,7 @@ class Profile:
                                                    {'id': self.userid},
                                                    'https://www.instagram.com/{0}/channel/'.format(self.username),
                                                    lambda d: d['data']['user']['edge_felix_video_timeline'],
-                                                    self._rhx_gis,
+                                                    first_data=self._metadata('edge_felix_video_timeline')))
                                                    self._metadata('edge_felix_video_timeline')))
    def get_followers(self) -> Iterator['Profile']:
        """
@@ -850,8 +829,7 @@ class Profile:
                    self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
                                                    {'id': str(self.userid)},
                                                    'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_followed_by'],
+                                                    lambda d: d['data']['user']['edge_followed_by']))
                                                    self._rhx_gis))
    def get_followees(self) -> Iterator['Profile']:
        """
@@ -865,8 +843,7 @@ class Profile:
                    self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
                                                    {'id': str(self.userid)},
                                                    'https://www.instagram.com/' + self.username + '/',
-                                                    lambda d: d['data']['user']['edge_follow'],
+                                                    lambda d: d['data']['user']['edge_follow']))
                                                    self._rhx_gis))
    def get_similar_accounts(self) -> Iterator['Profile']:
        """
@@ -881,8 +858,8 @@ class Profile:
        yield from (Profile(self._context, edge["node"]) for edge in
                    self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
                                                {"user_id": str(self.userid), "include_chaining": True},
-                                                "https://www.instagram.com/{0}/".format(self.username),
+                                                "https://www.instagram.com/{0}/"
-                                                self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
+                                                .format(self.username))["data"]["user"]["edge_chaining"]["edges"])
 class StoryItem: