Merge branch 'master' into upcoming/v4.5
This commit is contained in:
commit
c817d1901a
4
docs/_static/instaloader.css
vendored
4
docs/_static/instaloader.css
vendored
@ -17,7 +17,9 @@ code {
|
||||
.highlight pre {
|
||||
padding: 0.7em;
|
||||
color: #fff; }
|
||||
.highlight .c1, .highlight .k, .highlight .kn, .highlight .ow {
|
||||
.highlight .c1 {
|
||||
color: #666; }
|
||||
.highlight .k, .highlight .kn, .highlight .ow {
|
||||
color: #008d06; }
|
||||
.highlight .nb, .highlight .ne, .highlight .nf, .highlight .vm {
|
||||
color: #f48400; }
|
||||
|
6
docs/_static/instaloader.scss
vendored
6
docs/_static/instaloader.scss
vendored
@ -36,7 +36,11 @@ code {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.c1, .k, .kn, .ow {
|
||||
.c1 {
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.k, .kn, .ow {
|
||||
color: $color_instaloader_main
|
||||
}
|
||||
|
||||
|
@ -28,8 +28,9 @@ Download Posts in a Specific Period
|
||||
-----------------------------------
|
||||
|
||||
To only download Instagram pictures (and metadata) that are within a specific
|
||||
period, you can play around with :func:`~itertools.dropwhile` and
|
||||
:func:`~itertools.takewhile` from :mod:`itertools` like in this snippet.
|
||||
period, you can simply use :func:`~itertools.dropwhile` and
|
||||
:func:`~itertools.takewhile` from :mod:`itertools` on a generator that returns
|
||||
Posts in **exact chronological order**, such as :meth:`Profile.get_posts`.
|
||||
|
||||
.. literalinclude:: codesnippets/121_since_until.py
|
||||
|
||||
@ -37,6 +38,14 @@ See also :class:`Post`, :meth:`Instaloader.download_post`.
|
||||
|
||||
Discussed in :issue:`121`.
|
||||
|
||||
The code example with :func:`~itertools.dropwhile` and
|
||||
:func:`~itertools.takewhile` makes the assumption that the post iterator returns
|
||||
posts in exact chronological order. As discussed in :issue:`666`, the following
|
||||
approach fits for an **almost chronological order**, where up to *k* older posts
|
||||
are inserted into an otherwise chronological order, such as an Hashtag feed.
|
||||
|
||||
.. literalinclude:: codesnippets/666_historical_hashtag_data.py
|
||||
|
||||
Likes of a Profile / Ghost Followers
|
||||
------------------------------------
|
||||
|
||||
|
@ -5,13 +5,11 @@ import instaloader
|
||||
|
||||
L = instaloader.Instaloader()
|
||||
|
||||
posts = instaloader.Hashtag.from_name(L.context, 'urbanphotography').get_posts()
|
||||
# or
|
||||
# posts = instaloader.Profile.from_username(L.context, PROFILE).get_posts()
|
||||
posts = instaloader.Profile.from_username(L.context, "instagram").get_posts()
|
||||
|
||||
SINCE = datetime(2015, 5, 1)
|
||||
UNTIL = datetime(2015, 3, 1)
|
||||
|
||||
for post in takewhile(lambda p: p.date > UNTIL, dropwhile(lambda p: p.date > SINCE, posts)):
|
||||
print(post.date)
|
||||
L.download_post(post, '#urbanphotography')
|
||||
L.download_post(post, "instagram")
|
||||
|
30
docs/codesnippets/666_historical_hashtag_data.py
Normal file
30
docs/codesnippets/666_historical_hashtag_data.py
Normal file
@ -0,0 +1,30 @@
|
||||
from datetime import datetime
|
||||
import instaloader
|
||||
|
||||
L = instaloader.Instaloader()
|
||||
|
||||
posts = instaloader.Hashtag.from_name(L.context, "urbanphotography").get_posts()
|
||||
|
||||
SINCE = datetime(2020, 5, 10) # further from today, inclusive
|
||||
UNTIL = datetime(2020, 5, 11) # closer to today, not inclusive
|
||||
|
||||
k = 0 # initiate k
|
||||
k_list = [] # uncomment this to tune k
|
||||
|
||||
for post in posts:
|
||||
postdate = post.date
|
||||
|
||||
if postdate > UNTIL:
|
||||
continue
|
||||
elif postdate <= SINCE:
|
||||
k += 1
|
||||
if k == 50:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
else:
|
||||
L.download_post(post, "#urbanphotography")
|
||||
k = 0 # set k to 0
|
||||
# if you want to tune k, uncomment below to get your k max
|
||||
#k_list.append(k)
|
||||
#max(k_list)
|
@ -1,7 +1,7 @@
|
||||
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
|
||||
|
||||
|
||||
__version__ = '4.4.4'
|
||||
__version__ = '4.4.5'
|
||||
|
||||
|
||||
try:
|
||||
|
@ -69,7 +69,6 @@ class Post:
|
||||
self._node = node
|
||||
self._owner_profile = owner_profile
|
||||
self._full_metadata_dict = None # type: Optional[Dict[str, Any]]
|
||||
self._rhx_gis_str = None # type: Optional[str]
|
||||
self._location = None # type: Optional[PostLocation]
|
||||
self._iphone_struct_ = None
|
||||
if 'iphone_struct' in node:
|
||||
@ -142,9 +141,11 @@ class Post:
|
||||
|
||||
def _obtain_metadata(self):
|
||||
if not self._full_metadata_dict:
|
||||
pic_json = self._context.get_json("p/{0}/".format(self.shortcode), params={})
|
||||
self._full_metadata_dict = pic_json['entry_data']['PostPage'][0]['graphql']['shortcode_media']
|
||||
self._rhx_gis_str = pic_json.get('rhx_gis')
|
||||
pic_json = self._context.graphql_query(
|
||||
'2b0673e0dc4580674a88d426fe00ea90',
|
||||
{'shortcode': self.shortcode}
|
||||
)
|
||||
self._full_metadata_dict = pic_json['data']['shortcode_media']
|
||||
if self._full_metadata_dict is None:
|
||||
# issue #449
|
||||
self._context.error("Fetching Post metadata failed (issue #449). "
|
||||
@ -161,11 +162,6 @@ class Post:
|
||||
assert self._full_metadata_dict is not None
|
||||
return self._full_metadata_dict
|
||||
|
||||
@property
|
||||
def _rhx_gis(self) -> Optional[str]:
|
||||
self._obtain_metadata()
|
||||
return self._rhx_gis_str
|
||||
|
||||
@property
|
||||
def _iphone_struct(self) -> Dict[str, Any]:
|
||||
if not self._context.is_logged_in:
|
||||
@ -392,7 +388,7 @@ class Post:
|
||||
created_at_utc=datetime.utcfromtimestamp(node['created_at']),
|
||||
text=node['text'],
|
||||
owner=Profile(self._context, node['owner']),
|
||||
likes_count=node['edge_liked_by']['count'])
|
||||
likes_count=node.get('edge_liked_by', {}).get('count', 0))
|
||||
|
||||
def _postcommentanswers(node):
|
||||
if 'edge_threaded_comments' not in node:
|
||||
@ -418,14 +414,9 @@ class Post:
|
||||
if self.comments == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
try:
|
||||
comment_edges = self._field('edge_media_to_parent_comment', 'edges')
|
||||
answers_count = sum([edge['node']['edge_threaded_comments']['count'] for edge in comment_edges])
|
||||
threaded_comments_available = True
|
||||
except KeyError:
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
answers_count = 0
|
||||
threaded_comments_available = False
|
||||
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
|
||||
|
||||
if self.comments == len(comment_edges) + answers_count:
|
||||
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
||||
@ -433,14 +424,10 @@ class Post:
|
||||
return
|
||||
yield from (_postcomment(node) for node in
|
||||
self._context.graphql_node_list(
|
||||
"97b41c52301f77ce508f55e66d17620e" if threaded_comments_available
|
||||
else "f0986789a5c5d17c2400faebf16efd0d",
|
||||
"97b41c52301f77ce508f55e66d17620e",
|
||||
{'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d:
|
||||
d['data']['shortcode_media'][
|
||||
'edge_media_to_parent_comment' if threaded_comments_available else 'edge_media_to_comment'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment']))
|
||||
|
||||
def get_likes(self) -> Iterator['Profile']:
|
||||
"""Iterate over all likes of the post. A :class:`Profile` instance of each likee is yielded."""
|
||||
@ -455,8 +442,7 @@ class Post:
|
||||
yield from (Profile(self._context, node) for node in
|
||||
self._context.graphql_node_list("1cb6ec562846122743b61e492c85999f", {'shortcode': self.shortcode},
|
||||
'https://www.instagram.com/p/' + self.shortcode + '/',
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['shortcode_media']['edge_liked_by']))
|
||||
|
||||
@property
|
||||
def is_sponsored(self) -> bool:
|
||||
@ -537,7 +523,6 @@ class Profile:
|
||||
self._has_public_story = None # type: Optional[bool]
|
||||
self._node = node
|
||||
self._has_full_metadata = False
|
||||
self._rhx_gis = None
|
||||
self._iphone_struct_ = None
|
||||
if 'iphone_struct' in node:
|
||||
# if loaded from JSON with load_structure_from_file()
|
||||
@ -599,10 +584,9 @@ class Profile:
|
||||
def _obtain_metadata(self):
|
||||
try:
|
||||
if not self._has_full_metadata:
|
||||
metadata = self._context.get_json('{}/'.format(self.username), params={})
|
||||
metadata = self._context.get_json('{}/feed/'.format(self.username), params={})
|
||||
self._node = metadata['entry_data']['ProfilePage'][0]['graphql']['user']
|
||||
self._has_full_metadata = True
|
||||
self._rhx_gis = metadata.get('rhx_gis')
|
||||
except (QueryReturnedNotFoundException, KeyError) as err:
|
||||
top_search_results = TopSearchResults(self._context, self.username)
|
||||
similar_profiles = [profile.username for profile in top_search_results.get_profiles()]
|
||||
@ -735,8 +719,7 @@ class Profile:
|
||||
'include_reel': False, 'include_suggested_users': False,
|
||||
'include_logged_out_extras': True,
|
||||
'include_highlight_reels': False},
|
||||
'https://www.instagram.com/{}/'.format(self.username),
|
||||
self._rhx_gis)
|
||||
'https://www.instagram.com/{}/'.format(self.username))
|
||||
self._has_public_story = data['data']['user']['has_public_story']
|
||||
assert self._has_public_story is not None
|
||||
return self._has_public_story
|
||||
@ -795,8 +778,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_owner_to_timeline_media'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_owner_to_timeline_media')))
|
||||
first_data=self._metadata('edge_owner_to_timeline_media')))
|
||||
|
||||
def get_saved_posts(self) -> Iterator[Post]:
|
||||
"""Get Posts that are marked as saved by the user."""
|
||||
@ -810,8 +792,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_saved_media'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_saved_media')))
|
||||
first_data=self._metadata('edge_saved_media')))
|
||||
|
||||
def get_tagged_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all posts where a profile is tagged.
|
||||
@ -822,8 +803,7 @@ class Profile:
|
||||
self._context.graphql_node_list("e31a871f7301132ceaab56507a66bbb7",
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_user_to_photos_of_you'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_user_to_photos_of_you']))
|
||||
|
||||
def get_igtv_posts(self) -> Iterator[Post]:
|
||||
"""Retrieve all IGTV posts.
|
||||
@ -835,8 +815,7 @@ class Profile:
|
||||
{'id': self.userid},
|
||||
'https://www.instagram.com/{0}/channel/'.format(self.username),
|
||||
lambda d: d['data']['user']['edge_felix_video_timeline'],
|
||||
self._rhx_gis,
|
||||
self._metadata('edge_felix_video_timeline')))
|
||||
first_data=self._metadata('edge_felix_video_timeline')))
|
||||
|
||||
def get_followers(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -850,8 +829,7 @@ class Profile:
|
||||
self._context.graphql_node_list("37479f2b8209594dde7facb0d904896a",
|
||||
{'id': str(self.userid)},
|
||||
'https://www.instagram.com/' + self.username + '/',
|
||||
lambda d: d['data']['user']['edge_followed_by'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_followed_by']))
|
||||
|
||||
def get_followees(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -865,8 +843,7 @@ class Profile:
|
||||
self._context.graphql_node_list("58712303d941c6855d4e888c5f0cd22f",
|
||||
{'id': str(self.userid)},
|
||||
'https://www.instagram.com/' + self.username + '/',
|
||||
lambda d: d['data']['user']['edge_follow'],
|
||||
self._rhx_gis))
|
||||
lambda d: d['data']['user']['edge_follow']))
|
||||
|
||||
def get_similar_accounts(self) -> Iterator['Profile']:
|
||||
"""
|
||||
@ -881,8 +858,8 @@ class Profile:
|
||||
yield from (Profile(self._context, edge["node"]) for edge in
|
||||
self._context.graphql_query("ad99dd9d3646cc3c0dda65debcd266a7",
|
||||
{"user_id": str(self.userid), "include_chaining": True},
|
||||
"https://www.instagram.com/{0}/".format(self.username),
|
||||
self._rhx_gis)["data"]["user"]["edge_chaining"]["edges"])
|
||||
"https://www.instagram.com/{0}/"
|
||||
.format(self.username))["data"]["user"]["edge_chaining"]["edges"])
|
||||
|
||||
|
||||
class StoryItem:
|
||||
|
Loading…
Reference in New Issue
Block a user