Add --sanitize-paths option (#1452)
This commit is contained in:
parent
37a93ee59a
commit
604b107586
@ -239,6 +239,13 @@ How to Download
|
||||
|
||||
.. versionadded:: 4.8
|
||||
|
||||
.. option:: --sanitize-paths
|
||||
|
||||
Force sanitization of paths so that the resulting file and directory names
|
||||
are valid on both Windows and Unix.
|
||||
|
||||
.. versionadded:: 4.9
|
||||
|
||||
.. option:: --resume-prefix prefix
|
||||
|
||||
For many targets, Instaloader is capable of resuming a previously-aborted
|
||||
|
@ -383,6 +383,9 @@ def main():
|
||||
g_how.add_argument('--resume-prefix', metavar='PREFIX',
|
||||
help='Prefix for filenames that are used to save the information to resume an interrupted '
|
||||
'download.')
|
||||
g_how.add_argument('--sanitize-paths', action='store_true',
|
||||
help='Sanitize paths so that the resulting file and directory names are valid on both '
|
||||
'Windows and Unix.')
|
||||
g_how.add_argument('--no-resume', action='store_true',
|
||||
help='Do not resume a previously-aborted download iteration, and do not save such information '
|
||||
'when interrupted.')
|
||||
@ -463,7 +466,8 @@ def main():
|
||||
slide=args.slide,
|
||||
fatal_status_codes=args.abort_on,
|
||||
iphone_support=not args.no_iphone,
|
||||
title_pattern=args.title_pattern)
|
||||
title_pattern=args.title_pattern,
|
||||
sanitize_paths=args.sanitize_paths)
|
||||
_main(loader,
|
||||
args.profile,
|
||||
username=args.login.lower() if args.login is not None else None,
|
||||
|
@ -137,24 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter):
|
||||
|
||||
|
||||
class _PostPathFormatter(_ArbitraryItemFormatter):
|
||||
RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL',
|
||||
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
||||
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
|
||||
|
||||
def __init__(self, item: Any, force_windows_path: bool = False):
|
||||
super().__init__(item)
|
||||
self.force_windows_path = force_windows_path
|
||||
|
||||
def get_value(self, key, args, kwargs):
|
||||
ret = super().get_value(key, args, kwargs)
|
||||
if not isinstance(ret, str):
|
||||
return ret
|
||||
return self.sanitize_path(ret)
|
||||
return self.sanitize_path(ret, self.force_windows_path)
|
||||
|
||||
@staticmethod
|
||||
def sanitize_path(ret: str) -> str:
|
||||
def sanitize_path(ret: str, force_windows_path: bool = False) -> str:
|
||||
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
|
||||
ret = ret.replace('/', '\u2215')
|
||||
|
||||
if ret.startswith('.'):
|
||||
ret = ret.replace('.', '\u2024', 1)
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
if force_windows_path or platform.system() == 'Windows':
|
||||
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
|
||||
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
|
||||
ret = ret.replace('\n', ' ').replace('\r', ' ')
|
||||
root, ext = os.path.splitext(ret)
|
||||
if root.upper() in _PostPathFormatter.RESERVED:
|
||||
root += '_'
|
||||
if ext == '.':
|
||||
ext = '\u2024'
|
||||
ret = root + ext
|
||||
return ret
|
||||
|
||||
|
||||
@ -187,6 +201,7 @@ class Instaloader:
|
||||
:param slide: :option:`--slide`
|
||||
:param fatal_status_codes: :option:`--abort-on`
|
||||
:param iphone_support: not :option:`--no-iphone`
|
||||
:param sanitize_paths: :option:`--sanitize-paths`
|
||||
|
||||
.. attribute:: context
|
||||
|
||||
@ -216,7 +231,8 @@ class Instaloader:
|
||||
slide: Optional[str] = None,
|
||||
fatal_status_codes: Optional[List[int]] = None,
|
||||
iphone_support: bool = True,
|
||||
title_pattern: Optional[str] = None):
|
||||
title_pattern: Optional[str] = None,
|
||||
sanitize_paths: bool = False):
|
||||
|
||||
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
|
||||
request_timeout, rate_controller, fatal_status_codes,
|
||||
@ -233,6 +249,7 @@ class Instaloader:
|
||||
self.title_pattern = '{date_utc}_UTC_{typename}'
|
||||
else:
|
||||
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
|
||||
self.sanitize_paths = sanitize_paths
|
||||
self.download_pictures = download_pictures
|
||||
self.download_videos = download_videos
|
||||
self.download_video_thumbnails = download_video_thumbnails
|
||||
@ -296,7 +313,8 @@ class Instaloader:
|
||||
check_resume_bbd=self.check_resume_bbd,
|
||||
slide=self.slide,
|
||||
fatal_status_codes=self.context.fatal_status_codes,
|
||||
iphone_support=self.context.iphone_support)
|
||||
iphone_support=self.context.iphone_support,
|
||||
sanitize_paths=self.sanitize_paths)
|
||||
yield new_loader
|
||||
self.context.error_log.extend(new_loader.context.error_log)
|
||||
new_loader.context.error_log = [] # avoid double-printing of errors
|
||||
@ -506,9 +524,10 @@ class Instaloader:
|
||||
pic_bytes = http_response.content
|
||||
ig_filename = url.split('/')[-1].split('?')[0]
|
||||
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
|
||||
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(dirname,
|
||||
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
|
||||
dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(
|
||||
dirname,
|
||||
_PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target))
|
||||
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
|
||||
content_length = http_response.headers.get('Content-Length', None)
|
||||
if os.path.isfile(filename) and (not self.context.is_logged_in or
|
||||
@ -633,7 +652,7 @@ class Instaloader:
|
||||
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
|
||||
|
||||
.. versionadded:: 4.1"""
|
||||
return _PostPathFormatter(item).format(self.filename_pattern, target=target)
|
||||
return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target)
|
||||
|
||||
def download_post(self, post: Post, target: Union[str, Path]) -> bool:
|
||||
"""
|
||||
@ -665,7 +684,7 @@ class Instaloader:
|
||||
return False
|
||||
return True
|
||||
|
||||
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
|
||||
dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
||||
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
||||
|
||||
@ -846,7 +865,7 @@ class Instaloader:
|
||||
return True
|
||||
|
||||
date_local = item.date_local
|
||||
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
|
||||
dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
||||
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
||||
downloaded = False
|
||||
@ -914,8 +933,9 @@ class Instaloader:
|
||||
name = user_highlight.owner_username
|
||||
highlight_target = (filename_target
|
||||
if filename_target
|
||||
else (Path(_PostPathFormatter.sanitize_path(name)) /
|
||||
_PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path]
|
||||
else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
|
||||
_PostPathFormatter.sanitize_path(user_highlight.title,
|
||||
self.sanitize_paths))) # type: Union[str, Path]
|
||||
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
|
||||
self.download_highlight_cover(user_highlight, highlight_target)
|
||||
totalcount = user_highlight.itemcount
|
||||
@ -965,7 +985,7 @@ class Instaloader:
|
||||
else total_count)
|
||||
sanitized_target = target
|
||||
if isinstance(target, str):
|
||||
sanitized_target = _PostPathFormatter.sanitize_path(target)
|
||||
sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths)
|
||||
if takewhile is None:
|
||||
takewhile = lambda _: True
|
||||
with resumable_iteration(
|
||||
@ -1209,8 +1229,8 @@ class Instaloader:
|
||||
tagged_posts = profile.get_tagged_posts()
|
||||
self.posts_download_loop(tagged_posts,
|
||||
target if target
|
||||
else (Path(_PostPathFormatter.sanitize_path(profile.username)) /
|
||||
_PostPathFormatter.sanitize_path(':tagged')),
|
||||
else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) /
|
||||
_PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)),
|
||||
fast_update, post_filter, takewhile=posts_takewhile)
|
||||
if latest_stamps is not None and tagged_posts.first_item is not None:
|
||||
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)
|
||||
|
Loading…
Reference in New Issue
Block a user