updated url parsing, fixed login functionality

- Instagram changed url style => accordingly updated parser
- successfully tested login and download of private profiles
- prevent echoing at password prompt
This commit is contained in:
André Koch-Kramer 2016-06-22 14:00:22 +02:00
parent 1c83e324b6
commit e837c8ab5a

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import requests, re, json, datetime, shutil, os, time, random, sys, pickle import requests, re, json, datetime, shutil, os, time, random, sys, pickle, getpass
from io import BytesIO from io import BytesIO
class DownloaderException(Exception): class DownloaderException(Exception):
@ -88,7 +88,9 @@ def download_profilepic(name, url):
if os.path.isfile(filename): if os.path.isfile(filename):
print(filename + ' already exists') print(filename + ' already exists')
return None return None
m = re.search('http.*://.*instagram\.com/[^/]+/.', url) m = re.search('http.*://.*instagram.*[^/]+\.(com|net)/[^/]+/.', url)
if m is None:
raise DownloaderException("url \'" + url + "\' could not be processed")
index = len(m.group(0))-1 index = len(m.group(0))-1
offset = 8 if m.group(0)[-1:] == 's' else 0 offset = 8 if m.group(0)[-1:] == 's' else 0
url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:] url = url[:index] + 's2048x2048' + ('/' if offset == 0 else str()) + url[index+offset:]
@ -124,7 +126,7 @@ def test_login(user, session):
return False return False
r = session.get('https://www.instagram.com/') r = session.get('https://www.instagram.com/')
time.sleep(4 * random.random() + 1) time.sleep(4 * random.random() + 1)
if r.text.find(user.tolower()) != -1: if r.text.find(user.lower()) != -1:
return True return True
else: else:
return False return False
@ -133,22 +135,29 @@ def get_session(user, passwd, EmptySessionOnly=False, session=None):
def instaheader(): def instaheader():
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
'(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36' '(KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36'
return {'Accept-Encoding' : 'gzip, deflate', \ header = { 'Accept-Encoding' : 'gzip, deflate', \
'Accept-Language' : 'en-US,en;q=0.8', \ 'Accept-Language' : 'en-US,en;q=0.8', \
'Connection' : 'keep-alive', \ 'Connection' : 'keep-alive', \
'Content-Length' : '0', \ 'Content-Length' : '0', \
'Host' : 'www.instagram.com', \ 'Host' : 'www.instagram.com', \
'Origin' : 'https://www.instagram.com', \ 'Origin' : 'https://www.instagram.com', \
'Referer' : 'https://www.instagram.com/', \ 'Referer' : 'https://www.instagram.com/', \
'User-Agent' : user_agent, \ 'User-Agent' : user_agent, \
'X-Instagram-AJAX' : '1', \ 'X-Instagram-AJAX' : '1', \
'X-Requested-With' : 'XMLHttpRequest'} 'X-Requested-With' : 'XMLHttpRequest'}
if EmptySessionOnly:
del header['Host']
del header['Origin']
del header['Referer']
del header['X-Instagram-AJAX']
del header['X-Requested-With']
return header
if session is None: if session is None:
session = requests.Session() session = requests.Session()
session.cookies.update({'sessionid' : '', 'mid' : '', 'ig_pr' : '1', \ session.cookies.update({'sessionid' : '', 'mid' : '', 'ig_pr' : '1', \
'ig_vw' : '1920', 'csrftoken' : '', \ 'ig_vw' : '1920', 'csrftoken' : '', \
's_network' : '', 'ds_user_id' : ''}) 's_network' : '', 'ds_user_id' : ''})
session.headers.update(instaheader()) session.headers.update(instaheader())
if EmptySessionOnly: if EmptySessionOnly:
return session return session
r = session.get('https://www.instagram.com/') r = session.get('https://www.instagram.com/')
@ -177,22 +186,25 @@ def download(name, username = None, password = None, sessionfile = None, \
else: else:
download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"]) download_profilepic(name, data["entry_data"]["ProfilePage"][0]["user"]["profile_pic_url"])
time.sleep((SleepMinMax[1]-SleepMinMax[0])*random.random()+SleepMinMax[0]) time.sleep((SleepMinMax[1]-SleepMinMax[0])*random.random()+SleepMinMax[0])
if data["entry_data"]["ProfilePage"][0]["user"]["is_private"]: if not ProfilePicOnly and data["entry_data"]["ProfilePage"][0]["user"]["is_private"]:
if not test_login(username, session): if not test_login(username, session):
if username is None or password is None: if username is None or password is None:
while True: while True:
if username is None: if username is None:
username = input('Enter your Instagram username to login: ') username = input('Enter your Instagram username to login: ')
if password is None: if password is None:
password = input('Enter your corresponding Instagram password: ') password = getpass.getpass(
prompt='Enter your corresponding Instagram password: ')
session, status = get_session(username, password, session=session) session, status = get_session(username, password, session=session)
if status: if status:
break break
else: else:
session, status = get_session(username, password, session=session) session, status = get_session(username, password, session=session)
if not status: if not status:
raise DownloaderException("aborting...") raise DownloaderException("aborting due to login error")
if len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0 \ data = get_json(name, session=session)
if (not "nodes" in data["entry_data"]["ProfilePage"][0]["user"]["media"] \
or len(data["entry_data"]["ProfilePage"][0]["user"]["media"]["nodes"]) == 0) \
and not ProfilePicOnly: and not ProfilePicOnly:
raise DownloaderException("no pics found") raise DownloaderException("no pics found")
totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"] totalcount = data["entry_data"]["ProfilePage"][0]["user"]["media"]["count"]