Fix download of hashtags and locations
Fixes #1080, fixes #1129, closes #1240.
This commit is contained in:
parent
d6fd4c560c
commit
5d18857695
@ -22,6 +22,7 @@ from .exceptions import *
|
|||||||
from .instaloadercontext import InstaloaderContext, RateController
|
from .instaloadercontext import InstaloaderContext, RateController
|
||||||
from .lateststamps import LatestStamps
|
from .lateststamps import LatestStamps
|
||||||
from .nodeiterator import NodeIterator, resumable_iteration
|
from .nodeiterator import NodeIterator, resumable_iteration
|
||||||
|
from .sectioniterator import SectionIterator
|
||||||
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
|
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
|
||||||
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
|
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
|
||||||
|
|
||||||
@ -1088,18 +1089,12 @@ class Instaloader:
|
|||||||
.. versionchanged:: 4.2.9
|
.. versionchanged:: 4.2.9
|
||||||
Require being logged in (as required by Instagram)
|
Require being logged in (as required by Instagram)
|
||||||
"""
|
"""
|
||||||
has_next_page = True
|
yield from SectionIterator(
|
||||||
end_cursor = None
|
self.context,
|
||||||
while has_next_page:
|
lambda d: d["native_location_data"]["recent"],
|
||||||
if end_cursor:
|
lambda m: Post.from_iphone_struct(self.context, m),
|
||||||
params = {'__a': 1, 'max_id': end_cursor}
|
f"explore/locations/{location}/",
|
||||||
else:
|
)
|
||||||
params = {'__a': 1}
|
|
||||||
location_data = self.context.get_json('explore/locations/{0}/'.format(location),
|
|
||||||
params)['graphql']['location']['edge_location_to_media']
|
|
||||||
yield from (Post(self.context, edge['node']) for edge in location_data['edges'])
|
|
||||||
has_next_page = location_data['page_info']['has_next_page']
|
|
||||||
end_cursor = location_data['page_info']['end_cursor']
|
|
||||||
|
|
||||||
@_requires_login
|
@_requires_login
|
||||||
def download_location(self, location: str,
|
def download_location(self, location: str,
|
||||||
|
46
instaloader/sectioniterator.py
Normal file
46
instaloader/sectioniterator.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
from typing import Any, Callable, Dict, Iterator, Optional, TypeVar
|
||||||
|
|
||||||
|
from .instaloadercontext import InstaloaderContext
|
||||||
|
|
||||||
|
T = TypeVar('T')
|
||||||
|
|
||||||
|
|
||||||
|
class SectionIterator(Iterator[T]):
|
||||||
|
"""Iterator for the new 'sections'-style responses.
|
||||||
|
|
||||||
|
.. versionadded:: 4.9"""
|
||||||
|
def __init__(self,
|
||||||
|
context: InstaloaderContext,
|
||||||
|
sections_extractor: Callable[[Dict[str, Any]], Dict[str, Any]],
|
||||||
|
media_wrapper: Callable[[Dict], T],
|
||||||
|
query_path: str,
|
||||||
|
first_data: Optional[Dict[str, Any]] = None):
|
||||||
|
self._context = context
|
||||||
|
self._sections_extractor = sections_extractor
|
||||||
|
self._media_wrapper = media_wrapper
|
||||||
|
self._query_path = query_path
|
||||||
|
self._data = first_data or self._query()
|
||||||
|
self._page_index = 0
|
||||||
|
self._section_index = 0
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def _query(self, max_id: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
pagination_variables = {"max_id": max_id} if max_id is not None else {}
|
||||||
|
return self._sections_extractor(
|
||||||
|
self._context.get_json(self._query_path, params={"__a": 1, **pagination_variables})
|
||||||
|
)
|
||||||
|
|
||||||
|
def __next__(self) -> T:
|
||||||
|
if self._page_index < len(self._data['sections']):
|
||||||
|
media = self._data['sections'][self._page_index]['layout_content']['medias'][self._section_index]['media']
|
||||||
|
self._section_index += 1
|
||||||
|
if self._section_index >= len(self._data['sections'][self._page_index]['layout_content']['medias']):
|
||||||
|
self._section_index = 0
|
||||||
|
self._page_index += 1
|
||||||
|
return self._media_wrapper(media)
|
||||||
|
if self._data['more_available']:
|
||||||
|
self._page_index, self._section_index, self._data = 0, 0, self._query(self._data["next_max_id"])
|
||||||
|
return self.__next__()
|
||||||
|
raise StopIteration()
|
@ -3,7 +3,9 @@ import lzma
|
|||||||
import re
|
import re
|
||||||
from base64 import b64decode, b64encode
|
from base64 import b64decode, b64encode
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
from contextlib import suppress
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
|
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
|
||||||
|
|
||||||
@ -11,6 +13,7 @@ from . import __version__
|
|||||||
from .exceptions import *
|
from .exceptions import *
|
||||||
from .instaloadercontext import InstaloaderContext
|
from .instaloadercontext import InstaloaderContext
|
||||||
from .nodeiterator import FrozenNodeIterator, NodeIterator
|
from .nodeiterator import FrozenNodeIterator, NodeIterator
|
||||||
|
from .sectioniterator import SectionIterator
|
||||||
|
|
||||||
PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url'])
|
PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url'])
|
||||||
PostSidecarNode.__doc__ = "Item of a Sidecar Post."
|
PostSidecarNode.__doc__ = "Item of a Sidecar Post."
|
||||||
@ -89,6 +92,41 @@ class Post:
|
|||||||
"""Create a post object from a given mediaid"""
|
"""Create a post object from a given mediaid"""
|
||||||
return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid))
|
return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
|
||||||
|
"""Create a post from a given iphone_struct.
|
||||||
|
|
||||||
|
.. versionadded:: 4.9"""
|
||||||
|
media_types = {
|
||||||
|
1: "GraphImage",
|
||||||
|
2: "GraphVideo",
|
||||||
|
8: "GraphSidecar",
|
||||||
|
}
|
||||||
|
fake_node = {
|
||||||
|
"shortcode": media["code"],
|
||||||
|
"id": media["pk"],
|
||||||
|
"__typename": media_types[media["media_type"]],
|
||||||
|
"is_video": media_types[media["media_type"]] == "GraphVideo",
|
||||||
|
"date": media["taken_at"],
|
||||||
|
"caption": media["caption"].get("text") if media.get("caption") is not None else None,
|
||||||
|
"title": media.get("title"),
|
||||||
|
"viewer_has_liked": media["has_liked"],
|
||||||
|
"edge_media_preview_like": {"count": media["like_count"]},
|
||||||
|
"iphone_struct": media,
|
||||||
|
}
|
||||||
|
with suppress(KeyError):
|
||||||
|
fake_node["display_url"] = media['image_versions2']['candidates'][0]['url']
|
||||||
|
with suppress(KeyError):
|
||||||
|
fake_node["video_url"] = media['video_versions'][-1]['url']
|
||||||
|
fake_node["video_duration"] = media["video_duration"]
|
||||||
|
fake_node["video_view_count"] = media["view_count"]
|
||||||
|
with suppress(KeyError):
|
||||||
|
fake_node["edge_sidecar_to_children"] = {"edges": [{"node": {
|
||||||
|
"display_url": node['image_versions2']['candidates'][0]['url'],
|
||||||
|
"is_video": media_types[node["media_type"]] == "GraphVideo",
|
||||||
|
}} for node in media["carousel_media"]]}
|
||||||
|
return cls(context, fake_node, Profile.from_iphone_struct(context, media["user"]) if "user" in media else None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def shortcode_to_mediaid(code: str) -> int:
|
def shortcode_to_mediaid(code: str) -> int:
|
||||||
if len(code) > 11:
|
if len(code) > 11:
|
||||||
@ -665,6 +703,20 @@ class Profile:
|
|||||||
context.profile_id_cache[profile_id] = profile
|
context.profile_id_cache[profile_id] = profile
|
||||||
return profile
|
return profile
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
|
||||||
|
"""Create a profile from a given iphone_struct.
|
||||||
|
|
||||||
|
.. versionadded:: 4.9"""
|
||||||
|
return cls(context, {
|
||||||
|
"id": media["pk"],
|
||||||
|
"username": media["username"],
|
||||||
|
"is_private": media["is_private"],
|
||||||
|
"full_name": media["full_name"],
|
||||||
|
"profile_pic_url_hd": media["profile_pic_url"],
|
||||||
|
"iphone_struct": media,
|
||||||
|
})
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def own_profile(cls, context: InstaloaderContext):
|
def own_profile(cls, context: InstaloaderContext):
|
||||||
"""Return own profile if logged-in.
|
"""Return own profile if logged-in.
|
||||||
@ -1359,6 +1411,9 @@ class Hashtag:
|
|||||||
L.download_post(post, target="#"+hashtag.name)
|
L.download_post(post, target="#"+hashtag.name)
|
||||||
|
|
||||||
Also, this class implements == and is hashable.
|
Also, this class implements == and is hashable.
|
||||||
|
|
||||||
|
.. versionchanged:: 4.9
|
||||||
|
Removed ``get_related_tags()`` and ``is_top_media_only`` as these features were removed from Instagram.
|
||||||
"""
|
"""
|
||||||
def __init__(self, context: InstaloaderContext, node: Dict[str, Any]):
|
def __init__(self, context: InstaloaderContext, node: Dict[str, Any]):
|
||||||
assert "name" in node
|
assert "name" in node
|
||||||
@ -1387,8 +1442,8 @@ class Hashtag:
|
|||||||
return self._node["name"].lower()
|
return self._node["name"].lower()
|
||||||
|
|
||||||
def _query(self, params):
|
def _query(self, params):
|
||||||
return self._context.get_json("explore/tags/{0}/".format(self.name),
|
json_response = self._context.get_json("explore/tags/{0}/".format(self.name), params)
|
||||||
params)["graphql"]["hashtag"]
|
return json_response["graphql"]["hashtag"] if "graphql" in json_response else json_response["data"]
|
||||||
|
|
||||||
def _obtain_metadata(self):
|
def _obtain_metadata(self):
|
||||||
if not self._has_full_metadata:
|
if not self._has_full_metadata:
|
||||||
@ -1399,7 +1454,9 @@ class Hashtag:
|
|||||||
json_node = self._node.copy()
|
json_node = self._node.copy()
|
||||||
# remove posts
|
# remove posts
|
||||||
json_node.pop("edge_hashtag_to_top_posts", None)
|
json_node.pop("edge_hashtag_to_top_posts", None)
|
||||||
|
json_node.pop("top", None)
|
||||||
json_node.pop("edge_hashtag_to_media", None)
|
json_node.pop("edge_hashtag_to_media", None)
|
||||||
|
json_node.pop("recent", None)
|
||||||
return json_node
|
return json_node
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
@ -1435,30 +1492,33 @@ class Hashtag:
|
|||||||
return self._metadata("profile_pic_url")
|
return self._metadata("profile_pic_url")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def description(self) -> str:
|
def description(self) -> Optional[str]:
|
||||||
return self._metadata("description")
|
return self._metadata("description")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def allow_following(self) -> bool:
|
def allow_following(self) -> bool:
|
||||||
return self._metadata("allow_following")
|
return bool(self._metadata("allow_following"))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_following(self) -> bool:
|
def is_following(self) -> bool:
|
||||||
|
try:
|
||||||
return self._metadata("is_following")
|
return self._metadata("is_following")
|
||||||
|
except KeyError:
|
||||||
@property
|
return bool(self._metadata("following"))
|
||||||
def is_top_media_only(self) -> bool:
|
|
||||||
return self._metadata("is_top_media_only")
|
|
||||||
|
|
||||||
def get_related_tags(self) -> Iterator["Hashtag"]:
|
|
||||||
"""Yields similar hashtags."""
|
|
||||||
yield from (Hashtag(self._context, edge["node"])
|
|
||||||
for edge in self._metadata("edge_hashtag_to_related_tags", "edges"))
|
|
||||||
|
|
||||||
def get_top_posts(self) -> Iterator[Post]:
|
def get_top_posts(self) -> Iterator[Post]:
|
||||||
"""Yields the top posts of the hashtag."""
|
"""Yields the top posts of the hashtag."""
|
||||||
|
try:
|
||||||
yield from (Post(self._context, edge["node"])
|
yield from (Post(self._context, edge["node"])
|
||||||
for edge in self._metadata("edge_hashtag_to_top_posts", "edges"))
|
for edge in self._metadata("edge_hashtag_to_top_posts", "edges"))
|
||||||
|
except KeyError:
|
||||||
|
yield from SectionIterator(
|
||||||
|
self._context,
|
||||||
|
lambda d: d["data"]["top"],
|
||||||
|
lambda m: Post.from_iphone_struct(self._context, m),
|
||||||
|
f"explore/tags/{self.name}/",
|
||||||
|
self._metadata("top"),
|
||||||
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def mediacount(self) -> int:
|
def mediacount(self) -> int:
|
||||||
@ -1468,10 +1528,14 @@ class Hashtag:
|
|||||||
The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as
|
The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as
|
||||||
the hashtag count might include private posts
|
the hashtag count might include private posts
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
return self._metadata("edge_hashtag_to_media", "count")
|
return self._metadata("edge_hashtag_to_media", "count")
|
||||||
|
except KeyError:
|
||||||
|
return self._metadata("media_count")
|
||||||
|
|
||||||
def get_posts(self) -> Iterator[Post]:
|
def get_posts(self) -> Iterator[Post]:
|
||||||
"""Yields the posts associated with this hashtag."""
|
"""Yields the recent posts associated with this hashtag."""
|
||||||
|
try:
|
||||||
self._metadata("edge_hashtag_to_media", "edges")
|
self._metadata("edge_hashtag_to_media", "edges")
|
||||||
self._metadata("edge_hashtag_to_media", "page_info")
|
self._metadata("edge_hashtag_to_media", "page_info")
|
||||||
conn = self._metadata("edge_hashtag_to_media")
|
conn = self._metadata("edge_hashtag_to_media")
|
||||||
@ -1480,10 +1544,18 @@ class Hashtag:
|
|||||||
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]})
|
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]})
|
||||||
conn = data["edge_hashtag_to_media"]
|
conn = data["edge_hashtag_to_media"]
|
||||||
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
|
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
|
||||||
|
except KeyError:
|
||||||
|
yield from SectionIterator(
|
||||||
|
self._context,
|
||||||
|
lambda d: d["data"]["recent"],
|
||||||
|
lambda m: Post.from_iphone_struct(self._context, m),
|
||||||
|
f"explore/tags/{self.name}/",
|
||||||
|
self._metadata("recent"),
|
||||||
|
)
|
||||||
|
|
||||||
def get_all_posts(self) -> Iterator[Post]:
|
def get_all_posts(self) -> Iterator[Post]:
|
||||||
"""Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order."""
|
"""Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order."""
|
||||||
sorted_top_posts = iter(sorted(self.get_top_posts(), key=lambda p: p.date_utc, reverse=True))
|
sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True))
|
||||||
other_posts = self.get_posts()
|
other_posts = self.get_posts()
|
||||||
next_top = next(sorted_top_posts, None)
|
next_top = next(sorted_top_posts, None)
|
||||||
next_other = next(other_posts, None)
|
next_other = next(other_posts, None)
|
||||||
@ -1510,6 +1582,20 @@ class Hashtag:
|
|||||||
yield next_other
|
yield next_other
|
||||||
next_other = next(other_posts, None)
|
next_other = next(other_posts, None)
|
||||||
|
|
||||||
|
def get_posts_resumable(self) -> NodeIterator[Post]:
|
||||||
|
"""Get the recent posts of the hashtag in a resumable fashion.
|
||||||
|
|
||||||
|
:rtype: NodeIterator[Post]
|
||||||
|
|
||||||
|
.. versionadded:: 4.9"""
|
||||||
|
return NodeIterator(
|
||||||
|
self._context, "9b498c08113f1e09617a1703c22b2f32",
|
||||||
|
lambda d: d['data']['hashtag']['edge_hashtag_to_media'],
|
||||||
|
lambda n: Post(self._context, n),
|
||||||
|
{'tag_name': self.name},
|
||||||
|
f"https://www.instagram.com/explore/tags/{self.name}/"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TopSearchResults:
|
class TopSearchResults:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user