Allow graphql_node_list() to take a first page
This commit is contained in:
parent
25b8165547
commit
020830d591
@ -294,18 +294,19 @@ class InstaloaderContext:
|
||||
|
||||
def graphql_node_list(self, query_hash: str, query_variables: Dict[str, Any],
|
||||
query_referer: Optional[str],
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
||||
edge_extractor: Callable[[Dict[str, Any]], Dict[str, Any]],
|
||||
first_data: Optional[Dict[str, Any]] = None) -> Iterator[Dict[str, Any]]:
|
||||
"""Retrieve a list of GraphQL nodes."""
|
||||
query_variables['first'] = GRAPHQL_PAGE_LENGTH
|
||||
data = self.graphql_query(query_hash, query_variables, query_referer)
|
||||
while True:
|
||||
edge_struct = edge_extractor(data)
|
||||
yield from [edge['node'] for edge in edge_struct['edges']]
|
||||
if edge_struct['page_info']['has_next_page']:
|
||||
query_variables['after'] = edge_struct['page_info']['end_cursor']
|
||||
data = self.graphql_query(query_hash, query_variables, query_referer)
|
||||
else:
|
||||
break
|
||||
if first_data:
|
||||
data = first_data
|
||||
else:
|
||||
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer))
|
||||
yield from (edge['node'] for edge in data['edges'])
|
||||
while data['page_info']['has_next_page']:
|
||||
query_variables['after'] = data['page_info']['end_cursor']
|
||||
data = edge_extractor(self.graphql_query(query_hash, query_variables, query_referer))
|
||||
yield from (edge['node'] for edge in data['edges'])
|
||||
|
||||
def get_and_write_raw(self, url: str, filename: str, _attempt=1) -> None:
|
||||
"""Downloads raw data.
|
||||
|
@ -4,7 +4,7 @@ from datetime import datetime
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
|
||||
from .exceptions import *
|
||||
from .instaloadercontext import GRAPHQL_PAGE_LENGTH, InstaloaderContext
|
||||
from .instaloadercontext import InstaloaderContext
|
||||
|
||||
|
||||
def shortcode_to_mediaid(code: str) -> int:
|
||||
@ -248,6 +248,7 @@ class Post:
|
||||
if self.comments == len(comment_edges):
|
||||
# If the Post's metadata already contains all comments, don't do GraphQL requests to obtain them
|
||||
yield from (comment['node'] for comment in comment_edges)
|
||||
return
|
||||
yield from self._context.graphql_node_list("33ba35852cb50da46f5b5e889df7d159",
|
||||
{'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
@ -266,6 +267,7 @@ class Post:
|
||||
if self.likes == len(likes_edges):
|
||||
# If the Post's metadata already contains all likes, don't do GraphQL requests to obtain them
|
||||
yield from (like['node'] for like in likes_edges)
|
||||
return
|
||||
yield from self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'])
|
||||
@ -432,23 +434,12 @@ class Profile:
|
||||
|
||||
def get_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all posts from a profile."""
|
||||
yield from (Post(self._context, edge['node'], owner_profile=self)
|
||||
for edge in self._metadata('edge_owner_to_timeline_media', 'edges'))
|
||||
has_next_page = self._metadata('edge_owner_to_timeline_media', 'page_info', 'has_next_page')
|
||||
end_cursor = self._metadata('edge_owner_to_timeline_media', 'page_info', 'end_cursor')
|
||||
while has_next_page:
|
||||
# We do not use self.graphql_node_list() here, because profile_metadata
|
||||
# lets us obtain the first 12 nodes 'for free'
|
||||
data = self._context.graphql_query("472f257a40c653c64c666ce877d59d2b",
|
||||
{'id': self.userid,
|
||||
'first': GRAPHQL_PAGE_LENGTH,
|
||||
'after': end_cursor},
|
||||
'https://www.instagram.com/{0}/'.format(self.username))
|
||||
media = data['data']['user']['edge_owner_to_timeline_media']
|
||||
yield from (Post(self._context, edge['node'], owner_profile=self)
|
||||
for edge in media['edges'])
|
||||
has_next_page = media['page_info']['has_next_page']
|
||||
end_cursor = media['page_info']['end_cursor']
|
||||
yield from (Post(self._context, node, self) for node in
|
||||
self._context.graphql_node_list("472f257a40c653c64c666ce877d59d2b",
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
|
||||
self._metadata('edge_owner_to_timeline_media')))
|
||||
|
||||
def get_saved_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts that are marked as saved by the user."""
|
||||
@ -456,21 +447,12 @@ class Profile:
|
||||
if self.username != self._context.username:
|
||||
raise LoginRequiredException("--login={} required to get that profile's saved posts.".format(self.username))
|
||||
|
||||
yield from (Post(self._context, edge['node'])
|
||||
for edge in self._metadata('edge_saved_media', 'edges'))
|
||||
has_next_page = self._metadata('edge_saved_media', 'page_info', 'has_next_page')
|
||||
end_cursor = self._metadata('edge_saved_media', 'page_info', 'end_cursor')
|
||||
while has_next_page:
|
||||
data = self._context.graphql_query("f883d95537fbcd400f466f63d42bd8a1",
|
||||
{'id': self.userid,
|
||||
'first': GRAPHQL_PAGE_LENGTH,
|
||||
'after': end_cursor},
|
||||
'https://www.instagram.com/{0}/'.format(self.username))
|
||||
media = data['data']['user']['edge_saved_media']
|
||||
yield from (Post(self._context, edge['node'])
|
||||
for edge in media['edges'])
|
||||
has_next_page = media['page_info']['has_next_page']
|
||||
end_cursor = media['page_info']['end_cursor']
|
||||
yield from (Post(self._context, node) for node in
|
||||
self._context.graphql_node_list("f883d95537fbcd400f466f63d42bd8a1",
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_saved_media'],
|
||||
self._metadata('edge_saved_media')))
|
||||
|
||||
|
||||
class StoryItem:
|
||||
|
Loading…
Reference in New Issue
Block a user