Add option --title-pattern for profile pics and covers (#1127)

This commit is contained in:
Eduardo Kalinowski
2021-05-15 11:25:36 -03:00
committed by GitHub
parent a2715cbe03
commit 5345470ebf
5 changed files with 113 additions and 22 deletions

View File

@@ -359,10 +359,15 @@ def main():
'{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the '
'profile name. Defaults to \'{target}\'.')
g_how.add_argument('--filename-pattern',
help='Prefix of filenames, relative to the directory given with '
help='Prefix of filenames for posts and stories, relative to the directory given with '
'--dirname-pattern. {profile} is replaced by the profile name,'
'{target} is replaced by the target you specified, i.e. either :feed'
'#hashtag or the profile name. Defaults to \'{date_utc}_UTC\'')
g_how.add_argument('--title-pattern',
help='Prefix of filenames for profile pics, hashtag profile pics, and highlight covers. '
'Defaults to \'{date_utc}_UTC_{typename}\' if --dirname-pattern contains \'{target}\' '
'or \'{dirname}\', or if --dirname-pattern is not specified. Otherwise defaults to '
'\'{target}_{date_utc}_UTC_{typename}\'.')
g_how.add_argument('--resume-prefix', metavar='PREFIX',
help='Prefix for filenames that are used to save the information to resume an interrupted '
'download.')
@@ -445,7 +450,8 @@ def main():
check_resume_bbd=not args.use_aged_resume_files,
slide=args.slide,
fatal_status_codes=args.abort_on,
iphone_support=not args.no_iphone)
iphone_support=not args.no_iphone,
title_pattern=args.title_pattern)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@@ -10,7 +10,6 @@ import tempfile
from contextlib import contextmanager, suppress
from datetime import datetime, timezone
from functools import wraps
from hashlib import md5
from io import BytesIO
from pathlib import Path
from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast
@@ -23,7 +22,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController
from .nodeiterator import NodeIterator, resumable_iteration
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file, PostSidecarNode)
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
def get_default_session_filename(username: str) -> str:
@@ -102,7 +101,7 @@ class _ArbitraryItemFormatter(string.Formatter):
def get_value(self, key, args, kwargs):
"""Override to substitute {ATTRIBUTE} by attributes of our _item."""
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)):
if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode, TitlePic)):
return "{filename}"
if hasattr(self._item, key):
return getattr(self._item, key)
@@ -144,6 +143,9 @@ class Instaloader:
:param user_agent: :option:`--user-agent`
:param dirname_pattern: :option:`--dirname-pattern`, default is ``{target}``
:param filename_pattern: :option:`--filename-pattern`, default is ``{date_utc}_UTC``
:param title_pattern:
:option:`--title-pattern`, default is ``{date_utc}_UTC_{typename}`` if ``dirname_pattern`` contains
``{target}`` or ``{profile}``, ``{target}_{date_utc}_UTC_{typename}`` otherwise.
:param download_pictures: not :option:`--no-pictures`
:param download_videos: not :option:`--no-videos`
:param download_video_thumbnails: not :option:`--no-video-thumbnails`
@@ -191,7 +193,8 @@ class Instaloader:
check_resume_bbd: bool = True,
slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None,
iphone_support: bool = True):
iphone_support: bool = True,
title_pattern: Optional[str] = None):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller, fatal_status_codes,
@@ -200,6 +203,14 @@ class Instaloader:
# configuration parameters
self.dirname_pattern = dirname_pattern or "{target}"
self.filename_pattern = filename_pattern or "{date_utc}_UTC"
if title_pattern is not None:
self.title_pattern = title_pattern
else:
if (format_string_contains_key(self.dirname_pattern, 'profile') or
format_string_contains_key(self.dirname_pattern, 'target')):
self.title_pattern = '{date_utc}_UTC_{typename}'
else:
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
self.download_pictures = download_pictures
self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails
@@ -460,25 +471,26 @@ class Instaloader:
.. versionadded:: 4.3"""
def _epoch_to_string(epoch: datetime) -> str:
return epoch.strftime('%Y-%m-%d_%H-%M-%S_UTC')
http_response = self.context.get_raw(url)
date_object = None # type: Optional[datetime]
if 'Last-Modified' in http_response.headers:
date_object = datetime.strptime(http_response.headers["Last-Modified"], '%a, %d %b %Y %H:%M:%S GMT')
date_object = date_object.replace(tzinfo=timezone.utc)
pic_bytes = None
pic_identifier = _epoch_to_string(date_object)
else:
pic_bytes = http_response.content
pic_identifier = md5(pic_bytes).hexdigest()[:16]
filename = self.format_filename_within_target_path(target, owner_profile, pic_identifier, name_suffix, 'jpg')
ig_filename = url.split('/')[-1].split('?')[0]
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname,
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
content_length = http_response.headers.get('Content-Length', None)
if os.path.isfile(filename) and (not self.context.is_logged_in or
(content_length is not None and
os.path.getsize(filename) >= int(content_length))):
self.context.log(filename + ' already exists')
return None
return
os.makedirs(os.path.dirname(filename), exist_ok=True)
self.context.write_raw(pic_bytes if pic_bytes else http_response, filename)
if date_object:
@@ -572,7 +584,8 @@ class Instaloader:
os.makedirs(os.path.dirname(filename), exist_ok=True)
return filename
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None):
def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode, TitlePic],
target: Optional[Union[str, Path]] = None):
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1"""

View File

@@ -4,6 +4,7 @@ import re
from base64 import b64decode, b64encode
from collections import namedtuple
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Optional, Union
from . import __version__
@@ -1537,6 +1538,52 @@ class TopSearchResults:
return self._searchstring
class TitlePic:
def __init__(self, profile: Optional[Profile], target: Union[str, Path], typename: str,
filename: str, date_utc: Optional[datetime]):
self._profile = profile
self._target = target
self._typename = typename
self._filename = filename
self._date_utc = date_utc
@property
def profile(self) -> Union[str, Path]:
return self._profile.username.lower() if self._profile is not None else self._target
@property
def owner_username(self) -> Union[str, Path]:
return self.profile
@property
def owner_id(self) -> Union[str, Path]:
return str(self._profile.userid) if self._profile is not None else self._target
@property
def target(self) -> Union[str, Path]:
return self._target
@property
def typename(self) -> str:
return self._typename
@property
def filename(self) -> str:
return self._filename
@property
def date_utc(self) -> Optional[datetime]:
return self._date_utc
@property
def date(self) -> Optional[datetime]:
return self.date_utc
@property
def date_local(self) -> Optional[datetime]:
return self._date_utc.astimezone() if self._date_utc is not None else None
JsonExportable = Union[Post, Profile, StoryItem, Hashtag, FrozenNodeIterator]