Support resuming of downloading comments
Co-Authored-By: André Koch-Kramer <koch-kramer@web.de>
This commit is contained in:
parent
cd13211603
commit
f6731566cd
@ -335,18 +335,47 @@ class Instaloader:
|
||||
combined_answers.extend(y['answers'])
|
||||
unique_comments_list[-1]['answers'] = get_unique_comments(combined_answers)
|
||||
return unique_comments_list
|
||||
|
||||
def get_new_comments(new_comments, start):
|
||||
for idx, comment in enumerate(new_comments, start=start+1):
|
||||
if idx % 250 == 0:
|
||||
self.context.log('{}'.format(idx), end='…', flush=True)
|
||||
yield comment
|
||||
|
||||
def save_comments(extended_comments):
|
||||
unique_comments = get_unique_comments(extended_comments, combine_answers=True)
|
||||
answer_ids = set(int(answer['id']) for comment in unique_comments for answer in comment.get('answers', []))
|
||||
with open(filename, 'w') as file:
|
||||
file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, unique_comments)),
|
||||
indent=4))
|
||||
|
||||
base_filename = filename
|
||||
filename += '_comments.json'
|
||||
try:
|
||||
with open(filename) as fp:
|
||||
comments = json.load(fp)
|
||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||
comments = list()
|
||||
comments.extend(_postcomment_asdict(comment) for comment in post.get_comments())
|
||||
|
||||
comments_iterator = post.get_comments()
|
||||
try:
|
||||
with resumable_iteration(
|
||||
context=self.context,
|
||||
iterator=comments_iterator,
|
||||
load=load_structure_from_file,
|
||||
save=save_structure_to_file,
|
||||
format_path=lambda magic: "{}_{}_{}.json.xz".format(base_filename, self.resume_prefix, magic),
|
||||
check_bbd=self.check_resume_bbd,
|
||||
enabled=self.resume_prefix is not None
|
||||
) as (_is_resuming, start_index):
|
||||
comments.extend(_postcomment_asdict(comment)
|
||||
for comment in get_new_comments(comments_iterator, start_index))
|
||||
except (KeyboardInterrupt, AbortDownloadException):
|
||||
if comments:
|
||||
save_comments(comments)
|
||||
raise
|
||||
if comments:
|
||||
comments = get_unique_comments(comments, combine_answers=True)
|
||||
answer_ids = set(int(answer['id']) for comment in comments for answer in comment.get('answers', []))
|
||||
with open(filename, 'w') as file:
|
||||
file.write(json.dumps(list(filter(lambda t: int(t['id']) not in answer_ids, comments)), indent=4))
|
||||
save_comments(comments)
|
||||
self.context.log('comments', end=' ', flush=True)
|
||||
|
||||
def save_caption(self, filename: str, mtime: datetime, caption: str) -> None:
|
||||
|
@ -5,7 +5,7 @@ import os
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime, timedelta
|
||||
from lzma import LZMAError
|
||||
from typing import Any, Callable, Dict, Iterator, NamedTuple, Optional, Tuple, TypeVar
|
||||
from typing import Any, Callable, Dict, Iterable, Iterator, NamedTuple, Optional, Tuple, TypeVar
|
||||
|
||||
from .exceptions import AbortDownloadException, InvalidArgumentException, QueryReturnedBadRequestException
|
||||
from .instaloadercontext import InstaloaderContext
|
||||
@ -204,7 +204,7 @@ class NodeIterator(Iterator[T]):
|
||||
|
||||
@contextmanager
|
||||
def resumable_iteration(context: InstaloaderContext,
|
||||
iterator: Iterator,
|
||||
iterator: Iterable,
|
||||
load: Callable[[InstaloaderContext, str], Any],
|
||||
save: Callable[[FrozenNodeIterator, str], None],
|
||||
format_path: Callable[[str], str],
|
||||
|
@ -4,7 +4,7 @@ import re
|
||||
from base64 import b64decode, b64encode
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
from typing import Any, Dict, Iterable, Iterator, List, Optional, Union
|
||||
|
||||
from . import __version__
|
||||
from .exceptions import *
|
||||
@ -426,12 +426,15 @@ class Post:
|
||||
except KeyError:
|
||||
return self._field('edge_media_to_comment', 'count')
|
||||
|
||||
def get_comments(self) -> Iterator[PostComment]:
|
||||
def get_comments(self) -> Iterable[PostComment]:
|
||||
r"""Iterate over all comments of the post.
|
||||
|
||||
Each comment is represented by a PostComment namedtuple with fields text (string), created_at (datetime),
|
||||
id (int), owner (:class:`Profile`) and answers (:class:`~typing.Iterator`\ [:class:`PostCommentAnswer`])
|
||||
if available.
|
||||
|
||||
.. versionchanged:: 4.7
|
||||
Change return type to ``Iterable``.
|
||||
"""
|
||||
def _postcommentanswer(node):
|
||||
return PostCommentAnswer(id=int(node['id']),
|
||||
@ -466,16 +469,15 @@ class Post:
|
||||
answers=_postcommentanswers(node))
|
||||
if self.comments == 0:
|
||||
# Avoid doing additional requests if there are no comments
|
||||
return
|
||||
return []
|
||||
|
||||
comment_edges = self._field('edge_media_to_comment', 'edges')
|
||||
answers_count = sum([edge['node'].get('edge_threaded_comments', {}).get('count', 0) for edge in comment_edges])
|
||||
|
||||
if self.comments == len(comment_edges) + answers_count:
|
||||
# If the Post's metadata already contains all parent comments, don't do GraphQL requests to obtain them
|
||||
yield from (_postcomment(comment['node']) for comment in comment_edges)
|
||||
return
|
||||
yield from NodeIterator(
|
||||
return [_postcomment(comment['node']) for comment in comment_edges]
|
||||
return NodeIterator(
|
||||
self._context,
|
||||
'97b41c52301f77ce508f55e66d17620e',
|
||||
lambda d: d['data']['shortcode_media']['edge_media_to_parent_comment'],
|
||||
|
Loading…
Reference in New Issue
Block a user