Add --sanitize-paths option (#1452)
This commit is contained in:
parent
37a93ee59a
commit
604b107586
@ -239,6 +239,13 @@ How to Download
|
|||||||
|
|
||||||
.. versionadded:: 4.8
|
.. versionadded:: 4.8
|
||||||
|
|
||||||
|
.. option:: --sanitize-paths
|
||||||
|
|
||||||
|
Force sanitization of paths so that the resulting file and directory names
|
||||||
|
are valid on both Windows and Unix.
|
||||||
|
|
||||||
|
.. versionadded:: 4.9
|
||||||
|
|
||||||
.. option:: --resume-prefix prefix
|
.. option:: --resume-prefix prefix
|
||||||
|
|
||||||
For many targets, Instaloader is capable of resuming a previously-aborted
|
For many targets, Instaloader is capable of resuming a previously-aborted
|
||||||
|
@ -383,6 +383,9 @@ def main():
|
|||||||
g_how.add_argument('--resume-prefix', metavar='PREFIX',
|
g_how.add_argument('--resume-prefix', metavar='PREFIX',
|
||||||
help='Prefix for filenames that are used to save the information to resume an interrupted '
|
help='Prefix for filenames that are used to save the information to resume an interrupted '
|
||||||
'download.')
|
'download.')
|
||||||
|
g_how.add_argument('--sanitize-paths', action='store_true',
|
||||||
|
help='Sanitize paths so that the resulting file and directory names are valid on both '
|
||||||
|
'Windows and Unix.')
|
||||||
g_how.add_argument('--no-resume', action='store_true',
|
g_how.add_argument('--no-resume', action='store_true',
|
||||||
help='Do not resume a previously-aborted download iteration, and do not save such information '
|
help='Do not resume a previously-aborted download iteration, and do not save such information '
|
||||||
'when interrupted.')
|
'when interrupted.')
|
||||||
@ -463,7 +466,8 @@ def main():
|
|||||||
slide=args.slide,
|
slide=args.slide,
|
||||||
fatal_status_codes=args.abort_on,
|
fatal_status_codes=args.abort_on,
|
||||||
iphone_support=not args.no_iphone,
|
iphone_support=not args.no_iphone,
|
||||||
title_pattern=args.title_pattern)
|
title_pattern=args.title_pattern,
|
||||||
|
sanitize_paths=args.sanitize_paths)
|
||||||
_main(loader,
|
_main(loader,
|
||||||
args.profile,
|
args.profile,
|
||||||
username=args.login.lower() if args.login is not None else None,
|
username=args.login.lower() if args.login is not None else None,
|
||||||
|
@ -137,24 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter):
|
|||||||
|
|
||||||
|
|
||||||
class _PostPathFormatter(_ArbitraryItemFormatter):
|
class _PostPathFormatter(_ArbitraryItemFormatter):
|
||||||
|
RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL',
|
||||||
|
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
|
||||||
|
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
|
||||||
|
|
||||||
|
def __init__(self, item: Any, force_windows_path: bool = False):
|
||||||
|
super().__init__(item)
|
||||||
|
self.force_windows_path = force_windows_path
|
||||||
|
|
||||||
def get_value(self, key, args, kwargs):
|
def get_value(self, key, args, kwargs):
|
||||||
ret = super().get_value(key, args, kwargs)
|
ret = super().get_value(key, args, kwargs)
|
||||||
if not isinstance(ret, str):
|
if not isinstance(ret, str):
|
||||||
return ret
|
return ret
|
||||||
return self.sanitize_path(ret)
|
return self.sanitize_path(ret, self.force_windows_path)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sanitize_path(ret: str) -> str:
|
def sanitize_path(ret: str, force_windows_path: bool = False) -> str:
|
||||||
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
|
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
|
||||||
ret = ret.replace('/', '\u2215')
|
ret = ret.replace('/', '\u2215')
|
||||||
|
|
||||||
if ret.startswith('.'):
|
if ret.startswith('.'):
|
||||||
ret = ret.replace('.', '\u2024', 1)
|
ret = ret.replace('.', '\u2024', 1)
|
||||||
|
|
||||||
if platform.system() == 'Windows':
|
if force_windows_path or platform.system() == 'Windows':
|
||||||
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
|
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
|
||||||
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
|
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
|
||||||
ret = ret.replace('\n', ' ').replace('\r', ' ')
|
ret = ret.replace('\n', ' ').replace('\r', ' ')
|
||||||
|
root, ext = os.path.splitext(ret)
|
||||||
|
if root.upper() in _PostPathFormatter.RESERVED:
|
||||||
|
root += '_'
|
||||||
|
if ext == '.':
|
||||||
|
ext = '\u2024'
|
||||||
|
ret = root + ext
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
||||||
@ -187,6 +201,7 @@ class Instaloader:
|
|||||||
:param slide: :option:`--slide`
|
:param slide: :option:`--slide`
|
||||||
:param fatal_status_codes: :option:`--abort-on`
|
:param fatal_status_codes: :option:`--abort-on`
|
||||||
:param iphone_support: not :option:`--no-iphone`
|
:param iphone_support: not :option:`--no-iphone`
|
||||||
|
:param sanitize_paths: :option:`--sanitize-paths`
|
||||||
|
|
||||||
.. attribute:: context
|
.. attribute:: context
|
||||||
|
|
||||||
@ -216,7 +231,8 @@ class Instaloader:
|
|||||||
slide: Optional[str] = None,
|
slide: Optional[str] = None,
|
||||||
fatal_status_codes: Optional[List[int]] = None,
|
fatal_status_codes: Optional[List[int]] = None,
|
||||||
iphone_support: bool = True,
|
iphone_support: bool = True,
|
||||||
title_pattern: Optional[str] = None):
|
title_pattern: Optional[str] = None,
|
||||||
|
sanitize_paths: bool = False):
|
||||||
|
|
||||||
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
|
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
|
||||||
request_timeout, rate_controller, fatal_status_codes,
|
request_timeout, rate_controller, fatal_status_codes,
|
||||||
@ -233,6 +249,7 @@ class Instaloader:
|
|||||||
self.title_pattern = '{date_utc}_UTC_{typename}'
|
self.title_pattern = '{date_utc}_UTC_{typename}'
|
||||||
else:
|
else:
|
||||||
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
|
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
|
||||||
|
self.sanitize_paths = sanitize_paths
|
||||||
self.download_pictures = download_pictures
|
self.download_pictures = download_pictures
|
||||||
self.download_videos = download_videos
|
self.download_videos = download_videos
|
||||||
self.download_video_thumbnails = download_video_thumbnails
|
self.download_video_thumbnails = download_video_thumbnails
|
||||||
@ -296,7 +313,8 @@ class Instaloader:
|
|||||||
check_resume_bbd=self.check_resume_bbd,
|
check_resume_bbd=self.check_resume_bbd,
|
||||||
slide=self.slide,
|
slide=self.slide,
|
||||||
fatal_status_codes=self.context.fatal_status_codes,
|
fatal_status_codes=self.context.fatal_status_codes,
|
||||||
iphone_support=self.context.iphone_support)
|
iphone_support=self.context.iphone_support,
|
||||||
|
sanitize_paths=self.sanitize_paths)
|
||||||
yield new_loader
|
yield new_loader
|
||||||
self.context.error_log.extend(new_loader.context.error_log)
|
self.context.error_log.extend(new_loader.context.error_log)
|
||||||
new_loader.context.error_log = [] # avoid double-printing of errors
|
new_loader.context.error_log = [] # avoid double-printing of errors
|
||||||
@ -506,9 +524,10 @@ class Instaloader:
|
|||||||
pic_bytes = http_response.content
|
pic_bytes = http_response.content
|
||||||
ig_filename = url.split('/')[-1].split('?')[0]
|
ig_filename = url.split('/')[-1].split('?')[0]
|
||||||
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
|
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
|
||||||
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
|
dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||||
filename_template = os.path.join(dirname,
|
filename_template = os.path.join(
|
||||||
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
|
dirname,
|
||||||
|
_PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target))
|
||||||
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
|
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
|
||||||
content_length = http_response.headers.get('Content-Length', None)
|
content_length = http_response.headers.get('Content-Length', None)
|
||||||
if os.path.isfile(filename) and (not self.context.is_logged_in or
|
if os.path.isfile(filename) and (not self.context.is_logged_in or
|
||||||
@ -633,7 +652,7 @@ class Instaloader:
|
|||||||
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
|
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
|
||||||
|
|
||||||
.. versionadded:: 4.1"""
|
.. versionadded:: 4.1"""
|
||||||
return _PostPathFormatter(item).format(self.filename_pattern, target=target)
|
return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target)
|
||||||
|
|
||||||
def download_post(self, post: Post, target: Union[str, Path]) -> bool:
|
def download_post(self, post: Post, target: Union[str, Path]) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -665,7 +684,7 @@ class Instaloader:
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
|
dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||||
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
|
||||||
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
filename = self.__prepare_filename(filename_template, lambda: post.url)
|
||||||
|
|
||||||
@ -846,7 +865,7 @@ class Instaloader:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
date_local = item.date_local
|
date_local = item.date_local
|
||||||
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
|
dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target)
|
||||||
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
|
||||||
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
filename = self.__prepare_filename(filename_template, lambda: item.url)
|
||||||
downloaded = False
|
downloaded = False
|
||||||
@ -914,8 +933,9 @@ class Instaloader:
|
|||||||
name = user_highlight.owner_username
|
name = user_highlight.owner_username
|
||||||
highlight_target = (filename_target
|
highlight_target = (filename_target
|
||||||
if filename_target
|
if filename_target
|
||||||
else (Path(_PostPathFormatter.sanitize_path(name)) /
|
else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
|
||||||
_PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path]
|
_PostPathFormatter.sanitize_path(user_highlight.title,
|
||||||
|
self.sanitize_paths))) # type: Union[str, Path]
|
||||||
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
|
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
|
||||||
self.download_highlight_cover(user_highlight, highlight_target)
|
self.download_highlight_cover(user_highlight, highlight_target)
|
||||||
totalcount = user_highlight.itemcount
|
totalcount = user_highlight.itemcount
|
||||||
@ -965,7 +985,7 @@ class Instaloader:
|
|||||||
else total_count)
|
else total_count)
|
||||||
sanitized_target = target
|
sanitized_target = target
|
||||||
if isinstance(target, str):
|
if isinstance(target, str):
|
||||||
sanitized_target = _PostPathFormatter.sanitize_path(target)
|
sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths)
|
||||||
if takewhile is None:
|
if takewhile is None:
|
||||||
takewhile = lambda _: True
|
takewhile = lambda _: True
|
||||||
with resumable_iteration(
|
with resumable_iteration(
|
||||||
@ -1209,8 +1229,8 @@ class Instaloader:
|
|||||||
tagged_posts = profile.get_tagged_posts()
|
tagged_posts = profile.get_tagged_posts()
|
||||||
self.posts_download_loop(tagged_posts,
|
self.posts_download_loop(tagged_posts,
|
||||||
target if target
|
target if target
|
||||||
else (Path(_PostPathFormatter.sanitize_path(profile.username)) /
|
else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) /
|
||||||
_PostPathFormatter.sanitize_path(':tagged')),
|
_PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)),
|
||||||
fast_update, post_filter, takewhile=posts_takewhile)
|
fast_update, post_filter, takewhile=posts_takewhile)
|
||||||
if latest_stamps is not None and tagged_posts.first_item is not None:
|
if latest_stamps is not None and tagged_posts.first_item is not None:
|
||||||
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)
|
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)
|
||||||
|
Loading…
Reference in New Issue
Block a user