Use header to determine file extension (#1447)

This commit is contained in:
fireattack 2022-03-19 09:04:42 -05:00 committed by GitHub
parent d8d268f861
commit 5a42a7537f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -312,15 +312,25 @@ class Instaloader:
filename_suffix: Optional[str] = None, _attempt: int = 1) -> bool: filename_suffix: Optional[str] = None, _attempt: int = 1) -> bool:
"""Downloads and saves picture with given url under given directory with given timestamp. """Downloads and saves picture with given url under given directory with given timestamp.
Returns true, if file was actually downloaded, i.e. updated.""" Returns true, if file was actually downloaded, i.e. updated."""
urlmatch = re.search('\\.[a-z0-9]*\\?', url)
file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
if filename_suffix is not None: if filename_suffix is not None:
filename += '_' + filename_suffix filename += '_' + filename_suffix
filename += '.' + file_extension urlmatch = re.search('\\.[a-z0-9]*\\?', url)
if os.path.isfile(filename): file_extension = url[-3:] if urlmatch is None else urlmatch.group(0)[1:-1]
nominal_filename = filename + '.' + file_extension
if os.path.isfile(nominal_filename):
self.context.log(nominal_filename + ' exists', end=' ', flush=True)
return False
resp = self.context.get_raw(url)
if 'Content-Type' in resp.headers and resp.headers['Content-Type']:
header_extension = '.' + resp.headers['Content-Type'].split(';')[0].split('/')[-1]
header_extension = header_extension.lower().replace('jpeg', 'jpg')
filename += header_extension
else:
filename = nominal_filename
if filename != nominal_filename and os.path.isfile(filename):
self.context.log(filename + ' exists', end=' ', flush=True) self.context.log(filename + ' exists', end=' ', flush=True)
return False return False
self.context.get_and_write_raw(url, filename) self.context.write_raw(resp, filename)
os.utime(filename, (datetime.now().timestamp(), mtime.timestamp())) os.utime(filename, (datetime.now().timestamp(), mtime.timestamp()))
return True return True