Allow selecting range to download from an sidecar (#749)
Co-authored-by: Alexander Graf <17130992+aandergr@users.noreply.github.com>
This commit is contained in:
parent
097bf7fecc
commit
b31f279527
@ -82,6 +82,16 @@ What to Download of each Post
|
|||||||
Template to write in txt file for each StoryItem. See
|
Template to write in txt file for each StoryItem. See
|
||||||
:ref:`metadata-text-files`.
|
:ref:`metadata-text-files`.
|
||||||
|
|
||||||
|
.. option:: --slide
|
||||||
|
|
||||||
|
Download only selected images of a sidecar. You can select single images using their
|
||||||
|
index in the sidecar starting with the leftmost or you can specify a range of images
|
||||||
|
with the following syntax: ``start_index-end_index``. Example:
|
||||||
|
``--slide 1`` will select only the first image, ``--slide last`` only the last one and ``--slide 1-3`` will select only
|
||||||
|
the first three images.
|
||||||
|
|
||||||
|
.. versionadded:: 4.6
|
||||||
|
|
||||||
.. option:: --no-metadata-json
|
.. option:: --no-metadata-json
|
||||||
|
|
||||||
Do not create a JSON file containing the metadata of each post.
|
Do not create a JSON file containing the metadata of each post.
|
||||||
|
@ -267,6 +267,8 @@ def main():
|
|||||||
help="Do not download regular posts.")
|
help="Do not download regular posts.")
|
||||||
g_prof.add_argument('--no-profile-pic', action='store_true',
|
g_prof.add_argument('--no-profile-pic', action='store_true',
|
||||||
help='Do not download profile picture.')
|
help='Do not download profile picture.')
|
||||||
|
g_post.add_argument('--slide', action='store',
|
||||||
|
help='Set what image/interval of a sidecar you want to download.')
|
||||||
g_post.add_argument('--no-pictures', action='store_true',
|
g_post.add_argument('--no-pictures', action='store_true',
|
||||||
help='Do not download post pictures. Cannot be used together with --fast-update. '
|
help='Do not download post pictures. Cannot be used together with --fast-update. '
|
||||||
'Implies --no-video-thumbnails, does not imply --no-videos.')
|
'Implies --no-video-thumbnails, does not imply --no-videos.')
|
||||||
@ -424,7 +426,8 @@ def main():
|
|||||||
max_connection_attempts=args.max_connection_attempts,
|
max_connection_attempts=args.max_connection_attempts,
|
||||||
request_timeout=args.request_timeout,
|
request_timeout=args.request_timeout,
|
||||||
resume_prefix=resume_prefix,
|
resume_prefix=resume_prefix,
|
||||||
check_resume_bbd=not args.use_aged_resume_files)
|
check_resume_bbd=not args.use_aged_resume_files,
|
||||||
|
slide=args.slide)
|
||||||
_main(loader,
|
_main(loader,
|
||||||
args.profile,
|
args.profile,
|
||||||
username=args.login.lower() if args.login is not None else None,
|
username=args.login.lower() if args.login is not None else None,
|
||||||
|
@ -160,6 +160,7 @@ class Instaloader:
|
|||||||
:param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior
|
:param rate_controller: Generator for a :class:`RateController` to override rate controlling behavior
|
||||||
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
|
:param resume_prefix: :option:`--resume-prefix`, or None for :option:`--no-resume`.
|
||||||
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
|
:param check_resume_bbd: Whether to check the date of expiry of resume files and reject them if expired.
|
||||||
|
:param slide: :option:`--slide`
|
||||||
|
|
||||||
.. attribute:: context
|
.. attribute:: context
|
||||||
|
|
||||||
@ -185,7 +186,8 @@ class Instaloader:
|
|||||||
request_timeout: float = 300.0,
|
request_timeout: float = 300.0,
|
||||||
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
|
rate_controller: Optional[Callable[[InstaloaderContext], RateController]] = None,
|
||||||
resume_prefix: Optional[str] = "iterator",
|
resume_prefix: Optional[str] = "iterator",
|
||||||
check_resume_bbd: bool = True):
|
check_resume_bbd: bool = True,
|
||||||
|
slide: Optional[str] = None):
|
||||||
|
|
||||||
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
|
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
|
||||||
request_timeout, rate_controller)
|
request_timeout, rate_controller)
|
||||||
@ -207,6 +209,31 @@ class Instaloader:
|
|||||||
self.resume_prefix = resume_prefix
|
self.resume_prefix = resume_prefix
|
||||||
self.check_resume_bbd = check_resume_bbd
|
self.check_resume_bbd = check_resume_bbd
|
||||||
|
|
||||||
|
self.slide = slide or ""
|
||||||
|
self.slide_start = 0
|
||||||
|
self.slide_end = -1
|
||||||
|
if self.slide != "":
|
||||||
|
splitted = self.slide.split('-')
|
||||||
|
if len(splitted) == 1:
|
||||||
|
if splitted[0] == 'last':
|
||||||
|
# download only last image of a sidecar
|
||||||
|
self.slide_start = -1
|
||||||
|
else:
|
||||||
|
if int(splitted[0]) > 0:
|
||||||
|
self.slide_start = self.slide_end = int(splitted[0])-1
|
||||||
|
else:
|
||||||
|
raise InvalidArgumentException("--slide parameter must be greater than 0.")
|
||||||
|
elif len(splitted) == 2:
|
||||||
|
if splitted[1] == 'last':
|
||||||
|
self.slide_start = int(splitted[0])-1
|
||||||
|
elif 0 < int(splitted[0]) < int(splitted[1]):
|
||||||
|
self.slide_start = int(splitted[0])-1
|
||||||
|
self.slide_end = int(splitted[1])-1
|
||||||
|
else:
|
||||||
|
raise InvalidArgumentException("Invalid data for --slide parameter.")
|
||||||
|
else:
|
||||||
|
raise InvalidArgumentException("Invalid data for --slide parameter.")
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def anonymous_copy(self):
|
def anonymous_copy(self):
|
||||||
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
|
"""Yield an anonymous, otherwise equally-configured copy of an Instaloader instance; Then copy its error log."""
|
||||||
@ -228,7 +255,8 @@ class Instaloader:
|
|||||||
max_connection_attempts=self.context.max_connection_attempts,
|
max_connection_attempts=self.context.max_connection_attempts,
|
||||||
request_timeout=self.context.request_timeout,
|
request_timeout=self.context.request_timeout,
|
||||||
resume_prefix=self.resume_prefix,
|
resume_prefix=self.resume_prefix,
|
||||||
check_resume_bbd=self.check_resume_bbd)
|
check_resume_bbd=self.check_resume_bbd,
|
||||||
|
slide=self.slide)
|
||||||
yield new_loader
|
yield new_loader
|
||||||
self.context.error_log.extend(new_loader.context.error_log)
|
self.context.error_log.extend(new_loader.context.error_log)
|
||||||
new_loader.context.error_log = [] # avoid double-printing of errors
|
new_loader.context.error_log = [] # avoid double-printing of errors
|
||||||
@ -527,7 +555,10 @@ class Instaloader:
|
|||||||
downloaded = True
|
downloaded = True
|
||||||
if post.typename == 'GraphSidecar':
|
if post.typename == 'GraphSidecar':
|
||||||
if self.download_pictures or self.download_videos:
|
if self.download_pictures or self.download_videos:
|
||||||
for edge_number, sidecar_node in enumerate(post.get_sidecar_nodes(), start=1):
|
for edge_number, sidecar_node in enumerate(
|
||||||
|
post.get_sidecar_nodes(self.slide_start, self.slide_end),
|
||||||
|
start=post.mediacount if self.slide_start < 0 else self.slide_start + 1
|
||||||
|
):
|
||||||
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
|
||||||
suffix = str(edge_number)
|
suffix = str(edge_number)
|
||||||
if '{filename}' in self.filename_pattern:
|
if '{filename}' in self.filename_pattern:
|
||||||
|
@ -252,26 +252,49 @@ class Post:
|
|||||||
"""Type of post, GraphImage, GraphVideo or GraphSidecar"""
|
"""Type of post, GraphImage, GraphVideo or GraphSidecar"""
|
||||||
return self._field('__typename')
|
return self._field('__typename')
|
||||||
|
|
||||||
def get_sidecar_nodes(self) -> Iterator[PostSidecarNode]:
|
@property
|
||||||
"""Sidecar nodes of a Post with typename==GraphSidecar."""
|
def mediacount(self) -> int:
|
||||||
|
"""
|
||||||
|
The number of media in a sidecar Post, or 1 if the Post it not a sidecar.
|
||||||
|
|
||||||
|
.. versionadded:: 4.6
|
||||||
|
"""
|
||||||
|
if self.typename == 'GraphSidecar':
|
||||||
|
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||||
|
return len(edges)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def get_sidecar_nodes(self, start=0, end=-1) -> Iterator[PostSidecarNode]:
|
||||||
|
"""
|
||||||
|
Sidecar nodes of a Post with typename==GraphSidecar.
|
||||||
|
|
||||||
|
.. versionchanged:: 4.6
|
||||||
|
Added parameters *start* and *end* to specify a slice of sidecar media.
|
||||||
|
"""
|
||||||
if self.typename == 'GraphSidecar':
|
if self.typename == 'GraphSidecar':
|
||||||
edges = self._field('edge_sidecar_to_children', 'edges')
|
edges = self._field('edge_sidecar_to_children', 'edges')
|
||||||
if any(edge['node']['is_video'] for edge in edges):
|
if any(edge['node']['is_video'] for edge in edges):
|
||||||
# video_url is only present in full metadata, issue #558.
|
# video_url is only present in full metadata, issue #558.
|
||||||
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
edges = self._full_metadata['edge_sidecar_to_children']['edges']
|
||||||
|
if end < 0:
|
||||||
|
end = len(edges)-1
|
||||||
|
if start < 0:
|
||||||
|
start = len(edges)-1
|
||||||
for idx, edge in enumerate(edges):
|
for idx, edge in enumerate(edges):
|
||||||
node = edge['node']
|
if start <= idx <= end:
|
||||||
is_video = node['is_video']
|
node = edge['node']
|
||||||
display_url = node['display_url']
|
is_video = node['is_video']
|
||||||
if not is_video and self._context.is_logged_in:
|
display_url = node['display_url']
|
||||||
try:
|
if not is_video and self._context.is_logged_in:
|
||||||
carousel_media = self._iphone_struct['carousel_media']
|
try:
|
||||||
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
|
carousel_media = self._iphone_struct['carousel_media']
|
||||||
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
|
orig_url = carousel_media[idx]['image_versions2']['candidates'][0]['url']
|
||||||
except (InstaloaderException, KeyError, IndexError) as err:
|
display_url = re.sub(r'&se=\d+(&?)', r'\1', orig_url)
|
||||||
self._context.error('{} Unable to fetch high quality image version of {}.'.format(err, self))
|
except (InstaloaderException, KeyError, IndexError) as err:
|
||||||
yield PostSidecarNode(is_video=is_video, display_url=display_url,
|
self._context.error('{} Unable to fetch high quality image version of {}.'.format(
|
||||||
video_url=node['video_url'] if is_video else None)
|
err, self))
|
||||||
|
yield PostSidecarNode(is_video=is_video, display_url=display_url,
|
||||||
|
video_url=node['video_url'] if is_video else None)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def caption(self) -> Optional[str]:
|
def caption(self) -> Optional[str]:
|
||||||
|
Loading…
Reference in New Issue
Block a user