Intensive refactoring to satisfy pylint
This commit is contained in:
parent
d7d4d59bab
commit
7c3ca67a20
@ -8,4 +8,4 @@ python:
|
|||||||
install:
|
install:
|
||||||
- pip install pylint requests
|
- pip install pylint requests
|
||||||
script:
|
script:
|
||||||
- python3 -m pylint -r n -d bad-whitespace,bad-continuation,missing-docstring,anomalous-backslash-in-string,invalid-name,multiple-imports,dangerous-default-value,locally-disabled instagram
|
- python3 -m pylint -r n -d bad-whitespace,bad-continuation,missing-docstring,anomalous-backslash-in-string,multiple-imports,dangerous-default-value,locally-disabled instagram
|
||||||
|
151
instagram.py
151
instagram.py
@ -8,23 +8,21 @@ import requests
|
|||||||
class DownloaderException(Exception):
|
class DownloaderException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
quiet = False
|
def log(*msg, sep='', end='\n', flush=False, quiet=False):
|
||||||
|
|
||||||
def log(*msg, sep='', end='\n', flush=False):
|
|
||||||
if not quiet:
|
if not quiet:
|
||||||
print(*msg, sep=sep, end=end, flush=flush)
|
print(*msg, sep=sep, end=end, flush=flush)
|
||||||
|
|
||||||
def get_json(name, max_id = 0, session=None, SleepMinMax=[1,5]):
|
def get_json(name, max_id = 0, session=None, sleep_min_max=[1,5]):
|
||||||
if session is None:
|
if session is None:
|
||||||
session = get_session(None, None, True)
|
session = get_session(None, None, True)
|
||||||
r = session.get('http://www.instagram.com/'+name, \
|
resp = session.get('http://www.instagram.com/'+name, \
|
||||||
params={'max_id': max_id})
|
params={'max_id': max_id})
|
||||||
time.sleep(abs(SleepMinMax[1]-SleepMinMax[0])*random.random()+abs(SleepMinMax[0]))
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
||||||
m = re.search('window\._sharedData = .*<', r.text)
|
match = re.search('window\._sharedData = .*<', resp.text)
|
||||||
if m is None:
|
if match is None:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return json.loads(m.group(0)[21:-2])
|
return json.loads(match.group(0)[21:-2])
|
||||||
|
|
||||||
def get_last_id(data):
|
def get_last_id(data):
|
||||||
if len(data["entry_data"]) == 0 or \
|
if len(data["entry_data"]) == 0 or \
|
||||||
@ -34,43 +32,43 @@ def get_last_id(data):
|
|||||||
data = data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]
|
data = data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]
|
||||||
return int(data[len(data)-1]["id"])
|
return int(data[len(data)-1]["id"])
|
||||||
|
|
||||||
def epochToString(epoch):
|
def epoch_to_string(epoch):
|
||||||
return datetime.datetime.fromtimestamp(epoch).strftime('%Y-%m-%d_%H-%M-%S')
|
return datetime.datetime.fromtimestamp(epoch).strftime('%Y-%m-%d_%H-%M-%S')
|
||||||
|
|
||||||
def get_fileExtension(url):
|
def get_file_extension(url):
|
||||||
m = re.search('\.[a-z]*\?', url)
|
match = re.search('\.[a-z]*\?', url)
|
||||||
if m is None:
|
if match is None:
|
||||||
return url[-3:]
|
return url[-3:]
|
||||||
else:
|
else:
|
||||||
return m.group(0)[1:-1]
|
return match.group(0)[1:-1]
|
||||||
|
|
||||||
def download_pic(name, url, date_epoch, outputlabel=None):
|
def download_pic(name, url, date_epoch, outputlabel=None, quiet=False):
|
||||||
# Returns true, if file was actually downloaded, i.e. updated
|
# Returns true, if file was actually downloaded, i.e. updated
|
||||||
if outputlabel is None:
|
if outputlabel is None:
|
||||||
outputlabel = epochToString(date_epoch)
|
outputlabel = epoch_to_string(date_epoch)
|
||||||
filename = name.lower() + '/' + epochToString(date_epoch) + '.' + get_fileExtension(url)
|
filename = name.lower() + '/' + epoch_to_string(date_epoch) + '.' + get_file_extension(url)
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
log(outputlabel + ' exists', end=' ', flush=True)
|
log(outputlabel + ' exists', end=' ', flush=True, quiet=quiet)
|
||||||
return False
|
return False
|
||||||
r = get_session(None, None, True).get(url, stream=True)
|
resp = get_session(None, None, True).get(url, stream=True)
|
||||||
if r.status_code == 200:
|
if resp.status_code == 200:
|
||||||
log(outputlabel, end=' ', flush=True)
|
log(outputlabel, end=' ', flush=True, quiet=quiet)
|
||||||
os.makedirs(name.lower(), exist_ok=True)
|
os.makedirs(name.lower(), exist_ok=True)
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, 'wb') as file:
|
||||||
r.raw.decode_content = True
|
resp.raw.decode_content = True
|
||||||
shutil.copyfileobj(r.raw, f)
|
shutil.copyfileobj(resp.raw, file)
|
||||||
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
|
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
raise DownloaderException("file \'" + url + "\' could not be downloaded")
|
raise DownloaderException("file \'" + url + "\' could not be downloaded")
|
||||||
|
|
||||||
def saveCaption(name, date_epoch, caption):
|
def save_caption(name, date_epoch, caption, quiet=False):
|
||||||
filename = name.lower() + '/' + epochToString(date_epoch) + '.txt'
|
filename = name.lower() + '/' + epoch_to_string(date_epoch) + '.txt'
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r') as file:
|
||||||
fileCaption = f.read()
|
file_caption = file.read()
|
||||||
if fileCaption == caption:
|
if file_caption == caption:
|
||||||
log('txt unchanged', end=' ', flush=True)
|
log('txt unchanged', end=' ', flush=True, quiet=quiet)
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
def get_filename(index):
|
def get_filename(index):
|
||||||
@ -81,34 +79,34 @@ def saveCaption(name, date_epoch, caption):
|
|||||||
i = i + 1
|
i = i + 1
|
||||||
for index in range(i, 0, -1):
|
for index in range(i, 0, -1):
|
||||||
os.rename(get_filename(index-1), get_filename(index))
|
os.rename(get_filename(index-1), get_filename(index))
|
||||||
log('txt updated', end=' ', flush=True)
|
log('txt updated', end=' ', flush=True, quiet=quiet)
|
||||||
log('txt', end=' ', flush=True)
|
log('txt', end=' ', flush=True, quiet=quiet)
|
||||||
os.makedirs(name.lower(), exist_ok=True)
|
os.makedirs(name.lower(), exist_ok=True)
|
||||||
with open(filename, 'w') as text_file:
|
with open(filename, 'w') as text_file:
|
||||||
text_file.write(caption)
|
text_file.write(caption)
|
||||||
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
|
os.utime(filename, (datetime.datetime.now().timestamp(), date_epoch))
|
||||||
|
|
||||||
def download_profilepic(name, url):
|
def download_profilepic(name, url, quiet=False):
|
||||||
date_object = datetime.datetime.strptime(requests.head(url).headers["Last-Modified"], \
|
date_object = datetime.datetime.strptime(requests.head(url).headers["Last-Modified"], \
|
||||||
'%a, %d %b %Y %H:%M:%S GMT')
|
'%a, %d %b %Y %H:%M:%S GMT')
|
||||||
filename = name.lower() + '/' + epochToString(date_object.timestamp()) + \
|
filename = name.lower() + '/' + epoch_to_string(date_object.timestamp()) + \
|
||||||
'_UTC_profile_pic.' + url[-3:]
|
'_UTC_profile_pic.' + url[-3:]
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
log(filename + ' already exists')
|
log(filename + ' already exists', quiet=quiet)
|
||||||
return None
|
return None
|
||||||
m = re.search('http.*://.*instagram.*[^/]*\.(com|net)/[^/]+/.', url)
|
match = re.search('http.*://.*instagram.*[^/]*\.(com|net)/[^/]+/.', url)
|
||||||
if m is None:
|
if match is None:
|
||||||
raise DownloaderException("url \'" + url + "\' could not be processed")
|
raise DownloaderException("url \'" + url + "\' could not be processed")
|
||||||
index = len(m.group(0))-1
|
index = len(match.group(0))-1
|
||||||
offset = 8 if m.group(0)[-1:] == 's' else 0
|
offset = 8 if match.group(0)[-1:] == 's' else 0
|
||||||
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
|
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
|
||||||
r = get_session(None, None, True).get(url, stream=True)
|
resp = get_session(None, None, True).get(url, stream=True)
|
||||||
if r.status_code == 200:
|
if resp.status_code == 200:
|
||||||
log(filename)
|
log(filename, quiet=quiet)
|
||||||
os.makedirs(name.lower(), exist_ok=True)
|
os.makedirs(name.lower(), exist_ok=True)
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, 'wb') as file:
|
||||||
r.raw.decode_content = True
|
resp.raw.decode_content = True
|
||||||
shutil.copyfileobj(r.raw, f)
|
shutil.copyfileobj(resp.raw, file)
|
||||||
os.utime(filename, (datetime.datetime.now().timestamp(), date_object.timestamp()))
|
os.utime(filename, (datetime.datetime.now().timestamp(), date_object.timestamp()))
|
||||||
else:
|
else:
|
||||||
raise DownloaderException("file \'" + url + "\' could not be downloaded")
|
raise DownloaderException("file \'" + url + "\' could not be downloaded")
|
||||||
@ -116,15 +114,15 @@ def download_profilepic(name, url):
|
|||||||
def save_object(obj, filename):
|
def save_object(obj, filename):
|
||||||
if filename is None:
|
if filename is None:
|
||||||
filename = '/tmp/instaloader.session'
|
filename = '/tmp/instaloader.session'
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, 'wb') as file:
|
||||||
shutil.copyfileobj(BytesIO(pickle.dumps(obj, -1)), f)
|
shutil.copyfileobj(BytesIO(pickle.dumps(obj, -1)), file)
|
||||||
|
|
||||||
def load_object(filename):
|
def load_object(filename):
|
||||||
if filename is None:
|
if filename is None:
|
||||||
filename = '/tmp/instaloader.session'
|
filename = '/tmp/instaloader.session'
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
with open(filename, 'rb') as f:
|
with open(filename, 'rb') as sessionfile:
|
||||||
obj = pickle.load(f)
|
obj = pickle.load(sessionfile)
|
||||||
return obj
|
return obj
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
@ -132,11 +130,11 @@ def load_object(filename):
|
|||||||
def test_login(user, session):
|
def test_login(user, session):
|
||||||
if user is None or session is None:
|
if user is None or session is None:
|
||||||
return False
|
return False
|
||||||
r = session.get('https://www.instagram.com/')
|
resp = session.get('https://www.instagram.com/')
|
||||||
time.sleep(4 * random.random() + 1)
|
time.sleep(4 * random.random() + 1)
|
||||||
return r.text.find(user.lower()) != -1
|
return resp.text.find(user.lower()) != -1
|
||||||
|
|
||||||
def get_session(user, passwd, EmptySessionOnly=False, session=None):
|
def get_session(user, passwd, empty_session_only=False, session=None):
|
||||||
def instaheader():
|
def instaheader():
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
|
||||||
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
|
||||||
@ -150,7 +148,7 @@ def get_session(user, passwd, EmptySessionOnly=False, session=None):
|
|||||||
'User-Agent' : user_agent, \
|
'User-Agent' : user_agent, \
|
||||||
'X-Instagram-AJAX' : '1', \
|
'X-Instagram-AJAX' : '1', \
|
||||||
'X-Requested-With' : 'XMLHttpRequest'}
|
'X-Requested-With' : 'XMLHttpRequest'}
|
||||||
if EmptySessionOnly:
|
if empty_session_only:
|
||||||
del header['Host']
|
del header['Host']
|
||||||
del header['Origin']
|
del header['Origin']
|
||||||
del header['Referer']
|
del header['Referer']
|
||||||
@ -163,10 +161,10 @@ def get_session(user, passwd, EmptySessionOnly=False, session=None):
|
|||||||
'ig_vw' : '1920', 'csrftoken' : '', \
|
'ig_vw' : '1920', 'csrftoken' : '', \
|
||||||
's_network' : '', 'ds_user_id' : ''})
|
's_network' : '', 'ds_user_id' : ''})
|
||||||
session.headers.update(instaheader())
|
session.headers.update(instaheader())
|
||||||
if EmptySessionOnly:
|
if empty_session_only:
|
||||||
return session
|
return session
|
||||||
r = session.get('https://www.instagram.com/')
|
resp = session.get('https://www.instagram.com/')
|
||||||
session.headers.update({'X-CSRFToken':r.cookies['csrftoken']})
|
session.headers.update({'X-CSRFToken':resp.cookies['csrftoken']})
|
||||||
time.sleep(9 * random.random() + 3)
|
time.sleep(9 * random.random() + 3)
|
||||||
login = session.post('https://www.instagram.com/accounts/login/ajax/', \
|
login = session.post('https://www.instagram.com/accounts/login/ajax/', \
|
||||||
data={'password':passwd,'username':user}, allow_redirects=True)
|
data={'password':passwd,'username':user}, allow_redirects=True)
|
||||||
@ -183,7 +181,8 @@ def get_session(user, passwd, EmptySessionOnly=False, session=None):
|
|||||||
return session, False
|
return session, False
|
||||||
|
|
||||||
def download(name, username = None, password = None, sessionfile = None, \
|
def download(name, username = None, password = None, sessionfile = None, \
|
||||||
ProfilePicOnly = False, DownloadVideos = True, FastUpdate = False, SleepMinMax=[0.25,2]):
|
profile_pic_only = False, download_videos = True, fast_update = False, \
|
||||||
|
sleep_min_max=[0.25,2], quiet=False):
|
||||||
# pylint:disable=too-many-arguments,too-many-locals,too-many-nested-blocks,too-many-branches
|
# pylint:disable=too-many-arguments,too-many-locals,too-many-nested-blocks,too-many-branches
|
||||||
# We are aware that this function has many arguments, many local variables, many nested blocks
|
# We are aware that this function has many arguments, many local variables, many nested blocks
|
||||||
# and many branches. But we don't care.
|
# and many branches. But we don't care.
|
||||||
@ -192,9 +191,10 @@ def download(name, username = None, password = None, sessionfile = None, \
|
|||||||
if len(data["entry_data"]) == 0:
|
if len(data["entry_data"]) == 0:
|
||||||
raise DownloaderException("user does not exist")
|
raise DownloaderException("user does not exist")
|
||||||
else:
|
else:
|
||||||
download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
|
download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"],
|
||||||
time.sleep(abs(SleepMinMax[1]-SleepMinMax[0])*random.random()+abs(SleepMinMax[0]))
|
quiet=quiet)
|
||||||
if not ProfilePicOnly and data["entry_data"]["ProfilePage"][0]["user"]["is_private"]:
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
||||||
|
if not profile_pic_only and data["entry_data"]["ProfilePage"][0]["user"]["is_private"]:
|
||||||
if not test_login(username, session):
|
if not test_login(username, session):
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
if quiet:
|
if quiet:
|
||||||
@ -218,33 +218,34 @@ def download(name, username = None, password = None, sessionfile = None, \
|
|||||||
data = get_json(name, session=session)
|
data = get_json(name, session=session)
|
||||||
if ("nodes" not in data["entry_data"]["ProfilePage"][0]["user"]["media"] \
|
if ("nodes" not in data["entry_data"]["ProfilePage"][0]["user"]["media"] \
|
||||||
or len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0) \
|
or len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0) \
|
||||||
and not ProfilePicOnly:
|
and not profile_pic_only:
|
||||||
raise DownloaderException("no pics found")
|
raise DownloaderException("no pics found")
|
||||||
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
|
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]
|
||||||
if not ProfilePicOnly:
|
if not profile_pic_only:
|
||||||
count = 1
|
count = 1
|
||||||
while get_last_id(data) is not None:
|
while get_last_id(data) is not None:
|
||||||
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
for node in data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]:
|
||||||
log("[%3i/%3i] " % (count, totalcount), end="", flush=True)
|
log("[%3i/%3i] " % (count, totalcount), end="", flush=True, quiet=quiet)
|
||||||
count = count + 1
|
count = count + 1
|
||||||
downloaded = download_pic(name, node["display_src"], node["date"])
|
downloaded = download_pic(name, node["display_src"], node["date"], quiet=quiet)
|
||||||
time.sleep(abs(SleepMinMax[1]-SleepMinMax[0])*random.random()+abs(SleepMinMax[0]))
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random() + \
|
||||||
|
abs(sleep_min_max[0]))
|
||||||
if "caption" in node:
|
if "caption" in node:
|
||||||
saveCaption(name, node["date"], node["caption"])
|
save_caption(name, node["date"], node["caption"], quiet=quiet)
|
||||||
if node["is_video"] and DownloadVideos:
|
if node["is_video"] and download_videos:
|
||||||
video_data = get_json('p/' + node["code"], session=session)
|
video_data = get_json('p/' + node["code"], session=session)
|
||||||
download_pic(name, \
|
download_pic(name, \
|
||||||
video_data['entry_data']['PostPage'][0]['media']['video_url'], \
|
video_data['entry_data']['PostPage'][0]['media']['video_url'], \
|
||||||
node["date"], 'mp4')
|
node["date"], 'mp4', quiet=quiet)
|
||||||
log()
|
log(quiet=quiet)
|
||||||
if FastUpdate and not downloaded:
|
if fast_update and not downloaded:
|
||||||
return
|
return
|
||||||
data = get_json(name, get_last_id(data), session)
|
data = get_json(name, get_last_id(data), session)
|
||||||
time.sleep(abs(SleepMinMax[1]-SleepMinMax[0])*random.random()+abs(SleepMinMax[0]))
|
time.sleep(abs(sleep_min_max[1]-sleep_min_max[0])*random.random()+abs(sleep_min_max[0]))
|
||||||
if test_login(username, session):
|
if test_login(username, session):
|
||||||
save_object(session, sessionfile)
|
save_object(session, sessionfile)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def main():
|
||||||
parser = ArgumentParser(description='Simple downloader to fetch all Instagram pics and '\
|
parser = ArgumentParser(description='Simple downloader to fetch all Instagram pics and '\
|
||||||
'captions from a given profile')
|
'captions from a given profile')
|
||||||
parser.add_argument('targets', nargs='+', help='Names of profiles to download')
|
parser.add_argument('targets', nargs='+', help='Names of profiles to download')
|
||||||
@ -266,8 +267,10 @@ if __name__ == "__main__":
|
|||||||
help='Disable user interaction, i.e. do not print messages (except errors) and fail ' \
|
help='Disable user interaction, i.e. do not print messages (except errors) and fail ' \
|
||||||
'if login credentials are needed but not given.')
|
'if login credentials are needed but not given.')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
quiet = args.quiet
|
|
||||||
for target in args.targets:
|
for target in args.targets:
|
||||||
download(target, args.login, args.password, args.sessionfile,
|
download(target, args.login, args.password, args.sessionfile,
|
||||||
args.profile_pic_only, not args.skip_videos, args.fast_update,
|
args.profile_pic_only, not args.skip_videos, args.fast_update,
|
||||||
[0,0] if args.no_sleep else [0.25,2])
|
[0,0] if args.no_sleep else [0.25,2], args.quiet)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user