diff --git a/docs/basic-usage.rst b/docs/basic-usage.rst index dba7f02..fb437a2 100644 --- a/docs/basic-usage.rst +++ b/docs/basic-usage.rst @@ -149,27 +149,37 @@ target. The default is ``--dirname-pattern={target}``. In the dirname pattern, the token ``{target}`` is replaced by the target name, and ``{profile}`` is replaced by the owner of the post which is downloaded. -:option:`--filename-pattern` configures the path of the post's files relative +:option:`--filename-pattern` configures the path of the post and story's files relative to the target directory that is specified with :option:`--dirname-pattern`. The default is ``--filename-pattern={date_utc}_UTC``. The tokens ``{target}`` and ``{profile}`` are replaced like in the -dirname pattern. The following tokens are defined for usage with -:option:`--filename-pattern`: +dirname pattern. + +:option:`--title-pattern` is similar to :option:`--filename-pattern`, but for profile +pics, hashtag profile pics, and highlight covers. The default is +``{date_utc}_UTC_{typename}`` if :option:`--dirname-pattern` contains ``{target}`` or +``{profile}``, or ``{target}_{date_utc}_UTC_{typename}`` if it does not. Some tokens +are not supported for this option, see below for details. + +The following tokens are defined for usage with +:option:`--filename-pattern` and :option:`--title-pattern`: - ``{target}`` Target name (as given in Instaloader command line) - ``{profile}`` (same as ``{owner_username}``) - Owner of the Post / StoryItem. + Owner of the Post / StoryItem / ProfilePic. For hashtag profile pics and + highlight covers, equivalent to ``{target}``. - ``{owner_id}`` - Unique integer ID of owner profile. + Unique integer ID of owner profile. For hashtag profile pics, equivalent to + ``{target}``. - ``{shortcode}`` - Shortcode (identifier string). + Shortcode (identifier string). Not available for :option:`--title-pattern`. - ``{mediaid}`` - Integer representation of shortcode. + Integer representation of shortcode. Not available for :option:`--title-pattern`. - ``{filename}`` Instagram's internal filename. @@ -182,6 +192,10 @@ dirname pattern. The following tokens are defined for usage with {date_utc:%Y-%m-%d_%H-%M-%S} +- ``{typename}`` + Type of media being saved, such as GraphImage, GraphStoryVideo, profile_pic, + etc. + For example, encode the poster's profile name in the filenames with:: instaloader --filename-pattern={date_utc}_UTC_{profile} "#hashtag" diff --git a/docs/cli-options.rst b/docs/cli-options.rst index d40d113..f54acf2 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -205,12 +205,23 @@ How to Download .. option:: --filename-pattern FILENAME_PATTERN - Prefix of filenames, relative to the directory given with + Prefix of filenames for posts and stories, relative to the directory given with :option:`--dirname-pattern`. ``{profile}`` is replaced by the profile name, ``{target}`` is replaced by the target you specified, i.e. either ``:feed``, ``#hashtag`` or the profile name. Defaults to ``{date_utc}_UTC``. See :ref:`filename-specification` for a list of supported tokens. +.. option:: --title-pattern TITLE_PATTERN + + Prefix of filenames for profile pics, hashtag profile pics, and highlight + covers, relative to the directory given with :option:`--dirname-pattern`. + Defaults to ``{date_utc}_UTC_{typename}`` if :option:`--dirname-pattern` + contains ``{target}`` or ``{profile}``, otherwise defaults to + ``{target}_{date_utc}_UTC_{typename}``. + See :ref:`filename-specification` for a list of supported tokens. + + .. versionadded:: 4.8 + .. option:: --resume-prefix prefix For many targets, Instaloader is capable of resuming a previously-aborted diff --git a/instaloader/__main__.py b/instaloader/__main__.py index da4ede1..b220906 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -359,10 +359,15 @@ def main(): '{target} is replaced by the target you specified, i.e. either :feed, #hashtag or the ' 'profile name. Defaults to \'{target}\'.') g_how.add_argument('--filename-pattern', - help='Prefix of filenames, relative to the directory given with ' + help='Prefix of filenames for posts and stories, relative to the directory given with ' '--dirname-pattern. {profile} is replaced by the profile name,' '{target} is replaced by the target you specified, i.e. either :feed' '#hashtag or the profile name. Defaults to \'{date_utc}_UTC\'') + g_how.add_argument('--title-pattern', + help='Prefix of filenames for profile pics, hashtag profile pics, and highlight covers. ' + 'Defaults to \'{date_utc}_UTC_{typename}\' if --dirname-pattern contains \'{target}\' ' + 'or \'{dirname}\', or if --dirname-pattern is not specified. Otherwise defaults to ' + '\'{target}_{date_utc}_UTC_{typename}\'.') g_how.add_argument('--resume-prefix', metavar='PREFIX', help='Prefix for filenames that are used to save the information to resume an interrupted ' 'download.') @@ -445,7 +450,8 @@ def main(): check_resume_bbd=not args.use_aged_resume_files, slide=args.slide, fatal_status_codes=args.abort_on, - iphone_support=not args.no_iphone) + iphone_support=not args.no_iphone, + title_pattern=args.title_pattern) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index bf658c3..357c4f5 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -10,7 +10,6 @@ import tempfile from contextlib import contextmanager, suppress from datetime import datetime, timezone from functools import wraps -from hashlib import md5 from io import BytesIO from pathlib import Path from typing import Any, Callable, IO, Iterator, List, Optional, Set, Union, cast @@ -23,7 +22,7 @@ from .exceptions import * from .instaloadercontext import InstaloaderContext, RateController from .nodeiterator import NodeIterator, resumable_iteration from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, - load_structure_from_file, save_structure_to_file, PostSidecarNode) + load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic) def get_default_session_filename(username: str) -> str: @@ -102,7 +101,7 @@ class _ArbitraryItemFormatter(string.Formatter): def get_value(self, key, args, kwargs): """Override to substitute {ATTRIBUTE} by attributes of our _item.""" - if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode)): + if key == 'filename' and isinstance(self._item, (Post, StoryItem, PostSidecarNode, TitlePic)): return "{filename}" if hasattr(self._item, key): return getattr(self._item, key) @@ -144,6 +143,9 @@ class Instaloader: :param user_agent: :option:`--user-agent` :param dirname_pattern: :option:`--dirname-pattern`, default is ``{target}`` :param filename_pattern: :option:`--filename-pattern`, default is ``{date_utc}_UTC`` + :param title_pattern: + :option:`--title-pattern`, default is ``{date_utc}_UTC_{typename}`` if ``dirname_pattern`` contains + ``{target}`` or ``{profile}``, ``{target}_{date_utc}_UTC_{typename}`` otherwise. :param download_pictures: not :option:`--no-pictures` :param download_videos: not :option:`--no-videos` :param download_video_thumbnails: not :option:`--no-video-thumbnails` @@ -191,7 +193,8 @@ class Instaloader: check_resume_bbd: bool = True, slide: Optional[str] = None, fatal_status_codes: Optional[List[int]] = None, - iphone_support: bool = True): + iphone_support: bool = True, + title_pattern: Optional[str] = None): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller, fatal_status_codes, @@ -200,6 +203,14 @@ class Instaloader: # configuration parameters self.dirname_pattern = dirname_pattern or "{target}" self.filename_pattern = filename_pattern or "{date_utc}_UTC" + if title_pattern is not None: + self.title_pattern = title_pattern + else: + if (format_string_contains_key(self.dirname_pattern, 'profile') or + format_string_contains_key(self.dirname_pattern, 'target')): + self.title_pattern = '{date_utc}_UTC_{typename}' + else: + self.title_pattern = '{target}_{date_utc}_UTC_{typename}' self.download_pictures = download_pictures self.download_videos = download_videos self.download_video_thumbnails = download_video_thumbnails @@ -460,25 +471,26 @@ class Instaloader: .. versionadded:: 4.3""" - def _epoch_to_string(epoch: datetime) -> str: - return epoch.strftime('%Y-%m-%d_%H-%M-%S_UTC') - http_response = self.context.get_raw(url) date_object = None # type: Optional[datetime] if 'Last-Modified' in http_response.headers: date_object = datetime.strptime(http_response.headers["Last-Modified"], '%a, %d %b %Y %H:%M:%S GMT') + date_object = date_object.replace(tzinfo=timezone.utc) pic_bytes = None - pic_identifier = _epoch_to_string(date_object) else: pic_bytes = http_response.content - pic_identifier = md5(pic_bytes).hexdigest()[:16] - filename = self.format_filename_within_target_path(target, owner_profile, pic_identifier, name_suffix, 'jpg') + ig_filename = url.split('/')[-1].split('?')[0] + pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object) + dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target) + filename_template = os.path.join(dirname, + _PostPathFormatter(pic_data).format(self.title_pattern, target=target)) + filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg" content_length = http_response.headers.get('Content-Length', None) if os.path.isfile(filename) and (not self.context.is_logged_in or (content_length is not None and os.path.getsize(filename) >= int(content_length))): self.context.log(filename + ' already exists') - return None + return os.makedirs(os.path.dirname(filename), exist_ok=True) self.context.write_raw(pic_bytes if pic_bytes else http_response, filename) if date_object: @@ -572,7 +584,8 @@ class Instaloader: os.makedirs(os.path.dirname(filename), exist_ok=True) return filename - def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode], target: Optional[Union[str, Path]] = None): + def format_filename(self, item: Union[Post, StoryItem, PostSidecarNode, TitlePic], + target: Optional[Union[str, Path]] = None): """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. .. versionadded:: 4.1""" diff --git a/instaloader/structures.py b/instaloader/structures.py index b4b3a0d..2df2852 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -4,6 +4,7 @@ import re from base64 import b64decode, b64encode from collections import namedtuple from datetime import datetime +from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Union from . import __version__ @@ -1537,6 +1538,52 @@ class TopSearchResults: return self._searchstring +class TitlePic: + def __init__(self, profile: Optional[Profile], target: Union[str, Path], typename: str, + filename: str, date_utc: Optional[datetime]): + self._profile = profile + self._target = target + self._typename = typename + self._filename = filename + self._date_utc = date_utc + + @property + def profile(self) -> Union[str, Path]: + return self._profile.username.lower() if self._profile is not None else self._target + + @property + def owner_username(self) -> Union[str, Path]: + return self.profile + + @property + def owner_id(self) -> Union[str, Path]: + return str(self._profile.userid) if self._profile is not None else self._target + + @property + def target(self) -> Union[str, Path]: + return self._target + + @property + def typename(self) -> str: + return self._typename + + @property + def filename(self) -> str: + return self._filename + + @property + def date_utc(self) -> Optional[datetime]: + return self._date_utc + + @property + def date(self) -> Optional[datetime]: + return self.date_utc + + @property + def date_local(self) -> Optional[datetime]: + return self._date_utc.astimezone() if self._date_utc is not None else None + + JsonExportable = Union[Post, Profile, StoryItem, Hashtag, FrozenNodeIterator]