Download comments

Close #5
This commit is contained in:
André Koch-Kramer 2017-07-20 22:30:12 +02:00
parent ee8e159d56
commit 169ce1a300
3 changed files with 94 additions and 16 deletions

View File

@ -9,4 +9,4 @@ python:
install: install:
- pip install pylint requests - pip install pylint requests
script: script:
- python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods instaloader - python3 -m pylint -r n -d bad-whitespace,missing-docstring,too-many-arguments,locally-disabled,line-too-long,too-many-public-methods,too-many-lines instaloader

View File

@ -118,6 +118,9 @@ renames the folder likewise.
link. This requires an additional request to the link. This requires an additional request to the
Instagram server for each picture, which is why it is Instagram server for each picture, which is why it is
disabled by default. disabled by default.
--comments Download and update comments for each post. This
requires an additional request to the Instagram server
for each post, which is why it is disabled by default.
When to Stop Downloading When to Stop Downloading
^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -249,7 +249,7 @@ class Instaloader:
if page_info['has_next_page']: if page_info['has_next_page']:
resp = tmpsession.get('https://www.instagram.com/graphql/query/', resp = tmpsession.get('https://www.instagram.com/graphql/query/',
params={'query_id': 17851374694183129, params={'query_id': 17851374694183129,
'variables': '{"id":"' + str(profile_id) + '","first":500},"after":"' + 'variables': '{"id":"' + str(profile_id) + '","first":500,"after":"' +
page_info['end_cursor'] + '"}'}) page_info['end_cursor'] + '"}'})
data = resp.json() data = resp.json()
else: else:
@ -290,7 +290,7 @@ class Instaloader:
if page_info['has_next_page']: if page_info['has_next_page']:
resp = tmpsession.get('https://www.instagram.com/graphql/query/', resp = tmpsession.get('https://www.instagram.com/graphql/query/',
params={'query_id': 17874545323001329, params={'query_id': 17874545323001329,
'variables': '{"id":"' + str(profile_id) + '","first":500},"after":"' + 'variables': '{"id":"' + str(profile_id) + '","first":500,"after":"' +
page_info['end_cursor'] + '"}'}) page_info['end_cursor'] + '"}'})
data = resp.json() data = resp.json()
else: else:
@ -299,6 +299,38 @@ class Instaloader:
raise ConnectionException("ConnectionError({0}): unable to gather followees.".format(resp.status_code)) raise ConnectionException("ConnectionError({0}): unable to gather followees.".format(resp.status_code))
return followees return followees
def get_comments(self, shortcode: str) -> List[Dict[str, Any]]:
tmpsession = copy_session(self.session)
header = self.default_http_header(empty_session_only=True)
del header['Connection']
del header['Content-Length']
header['authority'] = 'www.instagram.com'
header['scheme'] = 'https'
header['accept'] = '*/*'
header['referer'] = 'https://www.instagram.com/p/' + shortcode + '/'
tmpsession.headers = header
resp = tmpsession.get('https://www.instagram.com/graphql/query/',
params={'query_id': 17852405266163336,
'variables': '{"shortcode":"' + shortcode + '","first":500}'})
if resp.status_code == 200:
data = resp.json()
comments = []
while True:
edge_media_to_comment = data['data']['shortcode_media']['edge_media_to_comment']
comments.extend([comment['node'] for comment in edge_media_to_comment['edges']])
page_info = edge_media_to_comment['page_info']
if page_info['has_next_page']:
resp = tmpsession.get('https://www.instagram.com/graphql/query/',
params={'query_id': 17852405266163336,
'variables': '{"shortcode":"' + shortcode + '","first":500,"after":"'
+ page_info['end_cursor'] + '"}'})
data = resp.json()
else:
break
else:
raise ConnectionException("ConnectionError({0}): unable to gather comments.".format(resp.status_code))
return comments
def download_pic(self, name: str, url: str, date_epoch: float, outputlabel: Optional[str] = None, def download_pic(self, name: str, url: str, date_epoch: float, outputlabel: Optional[str] = None,
filename_suffix: Optional[str] = None) -> bool: filename_suffix: Optional[str] = None) -> bool:
"""Downloads and saves picture with given url under given directory with given timestamp. """Downloads and saves picture with given url under given directory with given timestamp.
@ -330,6 +362,32 @@ class Instaloader:
else: else:
raise ConnectionException("File \'" + url + "\' could not be downloaded.") raise ConnectionException("File \'" + url + "\' could not be downloaded.")
def update_comments(self, name: str, shortcode: str, date_epoch: float) -> None:
if self.profile_subdirs:
filename = name.lower() + '/' + _epoch_to_string(date_epoch) + '_comments.json'
else:
filename = name.lower() + '__' + _epoch_to_string(date_epoch) + '_comments.json'
try:
comments = json.load(open(filename))
except FileNotFoundError:
comments = list()
comments.extend(self.get_comments(shortcode))
if comments:
with open(filename, 'w') as file:
comments_list = sorted(sorted(list(comments), key=lambda t: t['id']),
key=lambda t: t['created_at'], reverse=True)
unique_comments_list = [comments_list[0]]
#for comment in comments_list:
# if unique_comments_list[-1]['id'] != comment['id']:
# unique_comments_list.append(comment)
#file.write(json.dumps(unique_comments_list, indent=4))
#pylint:disable=invalid-name
for x, y in zip(comments_list[:-1], comments_list[1:]):
if x['id'] != y['id']:
unique_comments_list.append(y)
file.write(json.dumps(unique_comments_list, indent=4))
self._log('comments', end=' ', flush=True)
def save_caption(self, name: str, date_epoch: float, caption: str) -> None: def save_caption(self, name: str, date_epoch: float, caption: str) -> None:
"""Updates picture caption""" """Updates picture caption"""
# pylint:disable=too-many-branches # pylint:disable=too-many-branches
@ -543,7 +601,7 @@ class Instaloader:
return location_json["entry_data"]["LocationsPage"][0]["location"] return location_json["entry_data"]["LocationsPage"][0]["location"]
def download_node(self, node: Dict[str, Any], name: str, def download_node(self, node: Dict[str, Any], name: str,
download_videos: bool = True, geotags: bool = False) -> bool: download_videos: bool = True, geotags: bool = False, download_comments: bool = False) -> bool:
""" """
Download everything associated with one instagram node, i.e. picture, caption and video. Download everything associated with one instagram node, i.e. picture, caption and video.
@ -551,6 +609,7 @@ class Instaloader:
:param name: Name of profile to which this node belongs :param name: Name of profile to which this node belongs
:param download_videos: True, if videos should be downloaded :param download_videos: True, if videos should be downloaded
:param geotags: Download geotags :param geotags: Download geotags
:param download_comments: Update comments
:return: True if something was downloaded, False otherwise, i.e. file was already there :return: True if something was downloaded, False otherwise, i.e. file was already there
""" """
# pylint:disable=too-many-branches,too-many-locals # pylint:disable=too-many-branches,too-many-locals
@ -601,12 +660,15 @@ class Instaloader:
location = self.get_location(node_code) location = self.get_location(node_code)
if location: if location:
self.save_location(name, location, date) self.save_location(name, location, date)
if download_comments:
self.update_comments(name, node_code, date)
self._log() self._log()
return downloaded return downloaded
def download_feed_pics(self, max_count: int = None, fast_update: bool = False, def download_feed_pics(self, max_count: int = None, fast_update: bool = False,
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None, filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
download_videos: bool = True, geotags: bool = False) -> None: download_videos: bool = True, geotags: bool = False,
download_comments: bool = False) -> None:
""" """
Download pictures from the user's feed. Download pictures from the user's feed.
@ -624,6 +686,7 @@ class Instaloader:
:param filter_func: function(node), which returns True if given picture should not be downloaded :param filter_func: function(node), which returns True if given picture should not be downloaded
:param download_videos: True, if videos should be downloaded :param download_videos: True, if videos should be downloaded
:param geotags: Download geotags :param geotags: Download geotags
:param download_comments: Update comments
""" """
# pylint:disable=too-many-locals # pylint:disable=too-many-locals
data = self.get_feed_json() data = self.get_feed_json()
@ -646,7 +709,8 @@ class Instaloader:
self._log("[%3i] %s " % (count, name), end="", flush=True) self._log("[%3i] %s " % (count, name), end="", flush=True)
count += 1 count += 1
downloaded = self.download_node(node, name, downloaded = self.download_node(node, name,
download_videos=download_videos, geotags=geotags) download_videos=download_videos, geotags=geotags,
download_comments=download_comments)
if fast_update and not downloaded: if fast_update and not downloaded:
return return
if not feed["page_info"]["has_next_page"]: if not feed["page_info"]["has_next_page"]:
@ -662,6 +726,7 @@ class Instaloader:
max_count: Optional[int] = None, max_count: Optional[int] = None,
filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None, filter_func: Optional[Callable[[Dict[str, Dict[str, Any]]], bool]] = None,
fast_update: bool = False, download_videos: bool = True, geotags: bool = False, fast_update: bool = False, download_videos: bool = True, geotags: bool = False,
download_comments: bool = False,
lookup_username: bool = False) -> None: lookup_username: bool = False) -> None:
"""Download pictures of one hashtag. """Download pictures of one hashtag.
@ -675,6 +740,7 @@ class Instaloader:
:param fast_update: If true, abort when first already-downloaded picture is encountered :param fast_update: If true, abort when first already-downloaded picture is encountered
:param download_videos: True, if videos should be downloaded :param download_videos: True, if videos should be downloaded
:param geotags: Download geotags :param geotags: Download geotags
:param download_comments: Update comments
:param lookup_username: Lookup username to encode it in the downloaded file's path, rather than the hashtag :param lookup_username: Lookup username to encode it in the downloaded file's path, rather than the hashtag
""" """
data = self.get_hashtag_json(hashtag) data = self.get_hashtag_json(hashtag)
@ -694,7 +760,8 @@ class Instaloader:
continue continue
count += 1 count += 1
downloaded = self.download_node(node, pathname, downloaded = self.download_node(node, pathname,
download_videos=download_videos, geotags=geotags) download_videos=download_videos, geotags=geotags,
download_comments=download_comments)
if fast_update and not downloaded: if fast_update and not downloaded:
return return
if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']: if data['entry_data']['TagPage'][0]['tag']['media']['page_info']['has_next_page']:
@ -742,7 +809,7 @@ class Instaloader:
def download(self, name: str, def download(self, name: str,
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False, profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
fast_update: bool = False) -> None: download_comments: bool = False, fast_update: bool = False) -> None:
"""Download one profile""" """Download one profile"""
# pylint:disable=too-many-branches,too-many-locals # pylint:disable=too-many-branches,too-many-locals
# Get profile main page json # Get profile main page json
@ -785,7 +852,8 @@ class Instaloader:
self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True) self._log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
count += 1 count += 1
downloaded = self.download_node(node, name, downloaded = self.download_node(node, name,
download_videos=download_videos, geotags=geotags) download_videos=download_videos, geotags=geotags,
download_comments=download_comments)
if fast_update and not downloaded: if fast_update and not downloaded:
return return
data = self.get_json(name, max_id=get_last_id(data)) data = self.get_json(name, max_id=get_last_id(data))
@ -808,6 +876,7 @@ class Instaloader:
def download_profiles(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None, def download_profiles(self, profilelist: List[str], username: Optional[str] = None, password: Optional[str] = None,
sessionfile: Optional[str] = None, max_count: Optional[int] = None, sessionfile: Optional[str] = None, max_count: Optional[int] = None,
profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False, profile_pic_only: bool = False, download_videos: bool = True, geotags: bool = False,
download_comments: bool = False,
fast_update: bool = False, hashtag_lookup_username: bool = False) -> None: fast_update: bool = False, hashtag_lookup_username: bool = False) -> None:
"""Download set of profiles and handle sessions""" """Download set of profiles and handle sessions"""
# pylint:disable=too-many-branches,too-many-locals,too-many-statements # pylint:disable=too-many-branches,too-many-locals,too-many-statements
@ -835,7 +904,7 @@ class Instaloader:
self._log("Retrieving pictures with hashtag {0}".format(pentry)) self._log("Retrieving pictures with hashtag {0}".format(pentry))
self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update, self.download_hashtag(hashtag=pentry[1:], max_count=max_count, fast_update=fast_update,
download_videos=download_videos, geotags=geotags, download_videos=download_videos, geotags=geotags,
lookup_username=hashtag_lookup_username) download_comments=download_comments, lookup_username=hashtag_lookup_username)
elif pentry[0] == '@': elif pentry[0] == '@':
if username is not None: if username is not None:
self._log("Retrieving followees of %s..." % pentry[1:]) self._log("Retrieving followees of %s..." % pentry[1:])
@ -847,7 +916,8 @@ class Instaloader:
if username is not None: if username is not None:
self._log("Retrieving pictures from your feed...") self._log("Retrieving pictures from your feed...")
self.download_feed_pics(fast_update=fast_update, max_count=max_count, self.download_feed_pics(fast_update=fast_update, max_count=max_count,
download_videos=download_videos, geotags=geotags) download_videos=download_videos, geotags=geotags,
download_comments=download_comments)
else: else:
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
elif pentry == ":feed-liked": elif pentry == ":feed-liked":
@ -858,7 +928,8 @@ class Instaloader:
not node["likes"]["viewer_has_liked"] not node["likes"]["viewer_has_liked"]
if "likes" in node if "likes" in node
else not node["viewer_has_liked"], else not node["viewer_has_liked"],
download_videos=download_videos, geotags=geotags) download_videos=download_videos, geotags=geotags,
download_comments=download_comments)
else: else:
print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr) print("--login=USERNAME required to download {}.".format(pentry), file=sys.stderr)
else: else:
@ -870,7 +941,7 @@ class Instaloader:
try: try:
try: try:
self.download(target, profile_pic_only, download_videos, self.download(target, profile_pic_only, download_videos,
geotags, fast_update) geotags, download_comments, fast_update)
except ProfileNotExistsException as err: except ProfileNotExistsException as err:
if username is not None: if username is not None:
self._log(err) self._log(err)
@ -878,7 +949,7 @@ class Instaloader:
anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output, anonymous_loader = Instaloader(self.sleep, self.quiet, self.shorter_output,
self.profile_subdirs, self.user_agent) self.profile_subdirs, self.user_agent)
anonymous_loader.download(target, profile_pic_only, download_videos, anonymous_loader.download(target, profile_pic_only, download_videos,
geotags, fast_update) geotags, download_comments, fast_update)
else: else:
raise err raise err
except NonfatalException as err: except NonfatalException as err:
@ -920,6 +991,10 @@ def main():
'text file with the location\'s name and a Google Maps link. ' 'text file with the location\'s name and a Google Maps link. '
'This requires an additional request to the Instagram ' 'This requires an additional request to the Instagram '
'server for each picture, which is why it is disabled by default.') 'server for each picture, which is why it is disabled by default.')
g_what.add_argument('-C', '--comments', action='store_true',
help='Download and update comments for each post. '
'This requires an additional request to the Instagram '
'server for each post, which is why it is disabled by default.')
g_stop = parser.add_argument_group('When to Stop Downloading', g_stop = parser.add_argument_group('When to Stop Downloading',
'If none of these options are given, Instaloader goes through all pictures ' 'If none of these options are given, Instaloader goes through all pictures '
@ -978,8 +1053,8 @@ def main():
profile_subdirs=not args.no_profile_subdir, user_agent=args.user_agent) profile_subdirs=not args.no_profile_subdir, user_agent=args.user_agent)
loader.download_profiles(args.profile, args.login, args.password, args.sessionfile, loader.download_profiles(args.profile, args.login, args.password, args.sessionfile,
int(args.count) if args.count is not None else None, int(args.count) if args.count is not None else None,
args.profile_pic_only, not args.skip_videos, args.geotags, args.fast_update, args.profile_pic_only, not args.skip_videos, args.geotags, args.download_comments,
args.hashtag_username) args.fast_update, args.hashtag_username)
except InstaloaderException as err: except InstaloaderException as err:
raise SystemExit("Fatal error: %s" % err) raise SystemExit("Fatal error: %s" % err)