From c45b536be9f902a29b6be7beb217c1ee08af605e Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 24 Jul 2021 19:37:30 +0200 Subject: [PATCH 01/12] Update dependencies --- .github/workflows/lint.yml | 2 +- Pipfile | 2 + Pipfile.lock | 168 +++++++++++++++++++---------------- deploy/windows/create_exe.py | 2 +- instaloader/structures.py | 1 + 5 files changed, 98 insertions(+), 77 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2938ea0..674d2f1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -20,7 +20,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Dependencies run: | - python -m pip install pipenv==2020.11.15 + python -m pip install pipenv==2021.5.29 pipenv sync --dev - name: PyLint run: pipenv run pylint instaloader diff --git a/Pipfile b/Pipfile index 71dab89..696fbae 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,8 @@ pefile = "*" pywin32-ctypes = "*" psutil = "*" typing-extensions = "*" +types-requests = "*" +typed-ast = "*" [packages] requests = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 2360782..23352df 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9704331c2f9e76af9e5ee386bc359a46ee66d1e2f93aa880d9d148da7ba0e6f5" + "sha256": "4ea11d3dd7ae0070a8745cf21960210a09834ca21f47905fd3fab20ea68c6d3b" }, "pipfile-spec": 6, "requires": {}, @@ -21,35 +21,36 @@ ], "version": "==2021.5.30" }, - "chardet": { + "charset-normalizer": { "hashes": [ - "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", - "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b", + "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3" ], - "version": "==4.0.0" + "markers": "python_version >= '3'", + "version": "==2.0.4" }, "idna": { "hashes": [ - "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", - "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", + "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" ], - "version": "==2.10" + "markers": "python_version >= '3'", + "version": "==3.2" }, "requests": { "hashes": [ - "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", - "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", + "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" ], "index": "pypi", - "version": "==2.25.1" + "version": "==2.26.0" }, "urllib3": { "hashes": [ - "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c", - "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098" + "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", + "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" ], - "index": "pypi", - "version": "==1.26.5" + "version": "==1.26.6" } }, "develop": { @@ -69,10 +70,10 @@ }, "astroid": { "hashes": [ - "sha256:3c9a2d84354185d13213ff2640ec03d39168dbcd13648abc84fb13ca3b2e2761", - "sha256:d66a600e1602736a0f24f725a511b0e50d12eb18f54b31ec276d2c26a0a62c6a" + "sha256:3975a0bd5373bdce166e60c851cfcbaf21ee96de80ec518c1f4cb3e94c3fb334", + "sha256:ab7f36e8a78b8e54a62028ba6beef7561db4cdb6f2a5009ecc44a6f42b5697ef" ], - "version": "==2.5.7" + "version": "==2.6.6" }, "babel": { "hashes": [ @@ -88,12 +89,13 @@ ], "version": "==2021.5.30" }, - "chardet": { + "charset-normalizer": { "hashes": [ - "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", - "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b", + "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3" ], - "version": "==4.0.0" + "markers": "python_version >= '3'", + "version": "==2.0.4" }, "docutils": { "hashes": [ @@ -110,10 +112,11 @@ }, "idna": { "hashes": [ - "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", - "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", + "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" ], - "version": "==2.10" + "markers": "python_version >= '3'", + "version": "==3.2" }, "imagesize": { "hashes": [ @@ -124,10 +127,10 @@ }, "isort": { "hashes": [ - "sha256:0a943902919f65c5684ac4e0154b1ad4fac6dcaa5d9f3426b732f1c8b5419be6", - "sha256:2bb1680aad211e3c9944dbce1d4ba09a989f04e238296c87fe2139faa26d655d" + "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899", + "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2" ], - "version": "==5.8.0" + "version": "==5.9.3" }, "jinja2": { "hashes": [ @@ -211,31 +214,32 @@ }, "mypy": { "hashes": [ - "sha256:0d0a87c0e7e3a9becdfbe936c981d32e5ee0ccda3e0f07e1ef2c3d1a817cf73e", - "sha256:25adde9b862f8f9aac9d2d11971f226bd4c8fbaa89fb76bdadb267ef22d10064", - "sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c", - "sha256:2f9b3407c58347a452fc0736861593e105139b905cca7d097e413453a1d650b4", - "sha256:33f159443db0829d16f0a8d83d94df3109bb6dd801975fe86bacb9bf71628e97", - "sha256:3f2aca7f68580dc2508289c729bd49ee929a436208d2b2b6aab15745a70a57df", - "sha256:499c798053cdebcaa916eef8cd733e5584b5909f789de856b482cd7d069bdad8", - "sha256:4eec37370483331d13514c3f55f446fc5248d6373e7029a29ecb7b7494851e7a", - "sha256:552a815579aa1e995f39fd05dde6cd378e191b063f031f2acfe73ce9fb7f9e56", - "sha256:5873888fff1c7cf5b71efbe80e0e73153fe9212fafdf8e44adfe4c20ec9f82d7", - "sha256:61a3d5b97955422964be6b3baf05ff2ce7f26f52c85dd88db11d5e03e146a3a6", - "sha256:674e822aa665b9fd75130c6c5f5ed9564a38c6cea6a6432ce47eafb68ee578c5", - "sha256:7ce3175801d0ae5fdfa79b4f0cfed08807af4d075b402b7e294e6aa72af9aa2a", - "sha256:9743c91088d396c1a5a3c9978354b61b0382b4e3c440ce83cf77994a43e8c521", - "sha256:9f94aac67a2045ec719ffe6111df543bac7874cee01f41928f6969756e030564", - "sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49", - "sha256:abf7e0c3cf117c44d9285cc6128856106183938c68fd4944763003decdcfeb66", - "sha256:b09669bcda124e83708f34a94606e01b614fa71931d356c1f1a5297ba11f110a", - "sha256:cd07039aa5df222037005b08fbbfd69b3ab0b0bd7a07d7906de75ae52c4e3119", - "sha256:d23e0ea196702d918b60c8288561e722bf437d82cb7ef2edcd98cfa38905d506", - "sha256:d65cc1df038ef55a99e617431f0553cd77763869eebdf9042403e16089fe746c", - "sha256:d7da2e1d5f558c37d6e8c1246f1aec1e7349e4913d8fb3cb289a35de573fe2eb" + "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9", + "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a", + "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9", + "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e", + "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2", + "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212", + "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b", + "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885", + "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150", + "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703", + "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072", + "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457", + "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e", + "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0", + "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb", + "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97", + "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8", + "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811", + "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6", + "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de", + "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504", + "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921", + "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d" ], "index": "pypi", - "version": "==0.812" + "version": "==0.910" }, "mypy-extensions": { "hashes": [ @@ -246,17 +250,17 @@ }, "packaging": { "hashes": [ - "sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", - "sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" + "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7", + "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14" ], - "version": "==20.9" + "version": "==21.0" }, "pefile": { "hashes": [ - "sha256:a5d6e8305c6b210849b47a6174ddf9c452b2888340b8177874b862ba6c207645" + "sha256:ed79b2353daa58421459abf4d685953bde0adf9f6e188944f97ba9795f100246" ], "index": "pypi", - "version": "==2019.4.18" + "version": "==2021.5.24" }, "psutil": { "hashes": [ @@ -301,25 +305,31 @@ }, "pyinstaller": { "hashes": [ - "sha256:f5c0eeb2aa663cce9a5404292c0195011fa500a6501c873a466b2e8cad3c950c" + "sha256:30733baaf8971902286a0ddf77e5499ac5f7bf8e7c39163e83d4f8c696ef265e", + "sha256:4d848cd782ee0893d7ad9fe2bfe535206a79f0b6760cecc5f2add831258b9322", + "sha256:8f747b190e6ad30e2d2fd5da9a64636f61aac8c038c0b7f685efa92c782ea14f", + "sha256:aae456205c68355f9597411090576bb31b614e53976b4c102d072bbe5db8392a", + "sha256:c587da8f521a7ce1b9efb4e3d0117cd63c92dc6cedff24590aeef89372f53012", + "sha256:ecc2baadeeefd2b6fbf39d13c65d4aa603afdda1c6aaaebc4577ba72893fee9e", + "sha256:fed9f5e4802769a416a8f2ca171c6be961d1861cc05a0b71d20dfe05423137e9" ], "index": "pypi", - "version": "==4.2" + "version": "==4.5.1" }, "pyinstaller-hooks-contrib": { "hashes": [ - "sha256:27558072021857d89524c42136feaa2ffe4f003f1bdf0278f9b24f6902c1759c", - "sha256:892310e6363655838485ee748bf1c5e5cade7963686d9af8650ee218a3e0b031" + "sha256:57964f93eb69255c49159ffdf052aae893feed223b0f69773dfd010ca6c569d9", + "sha256:7f5d0689b30da3092149fc536a835a94045ac8c9f0e6dfb23ac171890f5ea8f2" ], - "version": "==2021.1" + "version": "==2021.2" }, "pylint": { "hashes": [ - "sha256:0e21d3b80b96740909d77206d741aa3ce0b06b41be375d92e1f3244a274c1f8a", - "sha256:d09b0b07ba06bcdff463958f53f23df25e740ecd81895f7d2699ec04bbd8dc3b" + "sha256:2e1a0eb2e8ab41d6b5dbada87f066492bb1557b12b76c47c2ee8aa8a11186594", + "sha256:8b838c8983ee1904b2de66cce9d0b96649a91901350e956d78f289c3bc87b48e" ], "index": "pypi", - "version": "==2.7.2" + "version": "==2.9.6" }, "pyparsing": { "hashes": [ @@ -345,11 +355,11 @@ }, "requests": { "hashes": [ - "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", - "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", + "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" ], "index": "pypi", - "version": "==2.25.1" + "version": "==2.26.0" }, "snowballstemmer": { "hashes": [ @@ -448,24 +458,32 @@ "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f", "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65" ], + "index": "pypi", "version": "==1.4.3" }, + "types-requests": { + "hashes": [ + "sha256:a5a305b43ea57bf64d6731f89816946a405b591eff6de28d4c0fd58422cee779", + "sha256:e21541c0f55c066c491a639309159556dd8c5833e49fcde929c4c47bdb0002ee" + ], + "index": "pypi", + "version": "==2.25.6" + }, "typing-extensions": { "hashes": [ - "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918", - "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", - "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" + "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497", + "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342", + "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84" ], "index": "pypi", - "version": "==3.7.4.3" + "version": "==3.10.0.0" }, "urllib3": { "hashes": [ - "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c", - "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098" + "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", + "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" ], - "index": "pypi", - "version": "==1.26.5" + "version": "==1.26.6" }, "wrapt": { "hashes": [ diff --git a/deploy/windows/create_exe.py b/deploy/windows/create_exe.py index 002ca10..059c2a5 100644 --- a/deploy/windows/create_exe.py +++ b/deploy/windows/create_exe.py @@ -44,7 +44,7 @@ with open('__main__.py', 'w+') as f: f.writelines(lines) # install dependencies and invoke PyInstaller -commands = ["pip install pipenv==2020.11.15", +commands = ["pip install pipenv==2021.5.29", "pipenv sync --dev", "pipenv run pyinstaller --log-level=DEBUG instaloader.spec"] diff --git a/instaloader/structures.py b/instaloader/structures.py index 726fe61..7c6850a 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1707,6 +1707,7 @@ def load_structure_from_file(context: InstaloaderContext, filename: str) -> Json if compressed: fp = lzma.open(filename, 'rt') else: + # pylint:disable=consider-using-with fp = open(filename, 'rt') json_structure = json.load(fp) fp.close() From 9eb2339028e8fb7cb9c44092bc81827aa4819947 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Tue, 10 Aug 2021 09:24:41 +0200 Subject: [PATCH 02/12] Update default user agent string --- docs/cli-options.rst | 2 +- instaloader/instaloadercontext.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 037b19b..9d5b0cd 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -273,7 +273,7 @@ How to Download .. option:: --user-agent USER_AGENT User Agent to use for HTTP requests. Per default, Instaloader pretends being - Chrome/89 on Linux. + Chrome/92 on Linux. .. option:: --max-connection-attempts N diff --git a/instaloader/instaloadercontext.py b/instaloader/instaloadercontext.py index 39f4361..5b3238e 100644 --- a/instaloader/instaloadercontext.py +++ b/instaloader/instaloadercontext.py @@ -33,7 +33,7 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N def default_user_agent() -> str: return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ - '(KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' + '(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36' class InstaloaderContext: From 7a32272b55c7eea1fc7a65b5119909de1f22a7cf Mon Sep 17 00:00:00 2001 From: Arman Yeghiazaryan Date: Sat, 14 Aug 2021 18:27:14 +0400 Subject: [PATCH 03/12] Fix the issue with folder names starting with dots (#1252) --- instaloader/instaloader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 5e4c2d2..d64d8be 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -146,6 +146,10 @@ class _PostPathFormatter(_ArbitraryItemFormatter): def sanitize_path(ret: str) -> str: """Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows.""" ret = ret.replace('/', '\u2215') + + if ret.startswith('.'): + ret = ret.replace('.', '\u2024', 1) + if platform.system() == 'Windows': ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') From 5d1885769578c231c77d5779f49bd43f35283810 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 12 Nov 2021 20:17:24 +0100 Subject: [PATCH 04/12] Fix download of hashtags and locations Fixes #1080, fixes #1129, closes #1240. --- instaloader/instaloader.py | 19 ++--- instaloader/sectioniterator.py | 46 +++++++++++ instaloader/structures.py | 138 ++++++++++++++++++++++++++------- 3 files changed, 165 insertions(+), 38 deletions(-) create mode 100644 instaloader/sectioniterator.py diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index d64d8be..d7d74fe 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -22,6 +22,7 @@ from .exceptions import * from .instaloadercontext import InstaloaderContext, RateController from .lateststamps import LatestStamps from .nodeiterator import NodeIterator, resumable_iteration +from .sectioniterator import SectionIterator from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic) @@ -1088,18 +1089,12 @@ class Instaloader: .. versionchanged:: 4.2.9 Require being logged in (as required by Instagram) """ - has_next_page = True - end_cursor = None - while has_next_page: - if end_cursor: - params = {'__a': 1, 'max_id': end_cursor} - else: - params = {'__a': 1} - location_data = self.context.get_json('explore/locations/{0}/'.format(location), - params)['graphql']['location']['edge_location_to_media'] - yield from (Post(self.context, edge['node']) for edge in location_data['edges']) - has_next_page = location_data['page_info']['has_next_page'] - end_cursor = location_data['page_info']['end_cursor'] + yield from SectionIterator( + self.context, + lambda d: d["native_location_data"]["recent"], + lambda m: Post.from_iphone_struct(self.context, m), + f"explore/locations/{location}/", + ) @_requires_login def download_location(self, location: str, diff --git a/instaloader/sectioniterator.py b/instaloader/sectioniterator.py new file mode 100644 index 0000000..77816f2 --- /dev/null +++ b/instaloader/sectioniterator.py @@ -0,0 +1,46 @@ +from typing import Any, Callable, Dict, Iterator, Optional, TypeVar + +from .instaloadercontext import InstaloaderContext + +T = TypeVar('T') + + +class SectionIterator(Iterator[T]): + """Iterator for the new 'sections'-style responses. + + .. versionadded:: 4.9""" + def __init__(self, + context: InstaloaderContext, + sections_extractor: Callable[[Dict[str, Any]], Dict[str, Any]], + media_wrapper: Callable[[Dict], T], + query_path: str, + first_data: Optional[Dict[str, Any]] = None): + self._context = context + self._sections_extractor = sections_extractor + self._media_wrapper = media_wrapper + self._query_path = query_path + self._data = first_data or self._query() + self._page_index = 0 + self._section_index = 0 + + def __iter__(self): + return self + + def _query(self, max_id: Optional[str] = None) -> Dict[str, Any]: + pagination_variables = {"max_id": max_id} if max_id is not None else {} + return self._sections_extractor( + self._context.get_json(self._query_path, params={"__a": 1, **pagination_variables}) + ) + + def __next__(self) -> T: + if self._page_index < len(self._data['sections']): + media = self._data['sections'][self._page_index]['layout_content']['medias'][self._section_index]['media'] + self._section_index += 1 + if self._section_index >= len(self._data['sections'][self._page_index]['layout_content']['medias']): + self._section_index = 0 + self._page_index += 1 + return self._media_wrapper(media) + if self._data['more_available']: + self._page_index, self._section_index, self._data = 0, 0, self._query(self._data["next_max_id"]) + return self.__next__() + raise StopIteration() diff --git a/instaloader/structures.py b/instaloader/structures.py index ece61fb..2959f76 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -3,7 +3,9 @@ import lzma import re from base64 import b64decode, b64encode from collections import namedtuple +from contextlib import suppress from datetime import datetime +from itertools import islice from pathlib import Path from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union @@ -11,6 +13,7 @@ from . import __version__ from .exceptions import * from .instaloadercontext import InstaloaderContext from .nodeiterator import FrozenNodeIterator, NodeIterator +from .sectioniterator import SectionIterator PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url']) PostSidecarNode.__doc__ = "Item of a Sidecar Post." @@ -89,6 +92,41 @@ class Post: """Create a post object from a given mediaid""" return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid)) + @classmethod + def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]): + """Create a post from a given iphone_struct. + + .. versionadded:: 4.9""" + media_types = { + 1: "GraphImage", + 2: "GraphVideo", + 8: "GraphSidecar", + } + fake_node = { + "shortcode": media["code"], + "id": media["pk"], + "__typename": media_types[media["media_type"]], + "is_video": media_types[media["media_type"]] == "GraphVideo", + "date": media["taken_at"], + "caption": media["caption"].get("text") if media.get("caption") is not None else None, + "title": media.get("title"), + "viewer_has_liked": media["has_liked"], + "edge_media_preview_like": {"count": media["like_count"]}, + "iphone_struct": media, + } + with suppress(KeyError): + fake_node["display_url"] = media['image_versions2']['candidates'][0]['url'] + with suppress(KeyError): + fake_node["video_url"] = media['video_versions'][-1]['url'] + fake_node["video_duration"] = media["video_duration"] + fake_node["video_view_count"] = media["view_count"] + with suppress(KeyError): + fake_node["edge_sidecar_to_children"] = {"edges": [{"node": { + "display_url": node['image_versions2']['candidates'][0]['url'], + "is_video": media_types[node["media_type"]] == "GraphVideo", + }} for node in media["carousel_media"]]} + return cls(context, fake_node, Profile.from_iphone_struct(context, media["user"]) if "user" in media else None) + @staticmethod def shortcode_to_mediaid(code: str) -> int: if len(code) > 11: @@ -665,6 +703,20 @@ class Profile: context.profile_id_cache[profile_id] = profile return profile + @classmethod + def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]): + """Create a profile from a given iphone_struct. + + .. versionadded:: 4.9""" + return cls(context, { + "id": media["pk"], + "username": media["username"], + "is_private": media["is_private"], + "full_name": media["full_name"], + "profile_pic_url_hd": media["profile_pic_url"], + "iphone_struct": media, + }) + @classmethod def own_profile(cls, context: InstaloaderContext): """Return own profile if logged-in. @@ -1359,6 +1411,9 @@ class Hashtag: L.download_post(post, target="#"+hashtag.name) Also, this class implements == and is hashable. + + .. versionchanged:: 4.9 + Removed ``get_related_tags()`` and ``is_top_media_only`` as these features were removed from Instagram. """ def __init__(self, context: InstaloaderContext, node: Dict[str, Any]): assert "name" in node @@ -1387,8 +1442,8 @@ class Hashtag: return self._node["name"].lower() def _query(self, params): - return self._context.get_json("explore/tags/{0}/".format(self.name), - params)["graphql"]["hashtag"] + json_response = self._context.get_json("explore/tags/{0}/".format(self.name), params) + return json_response["graphql"]["hashtag"] if "graphql" in json_response else json_response["data"] def _obtain_metadata(self): if not self._has_full_metadata: @@ -1399,7 +1454,9 @@ class Hashtag: json_node = self._node.copy() # remove posts json_node.pop("edge_hashtag_to_top_posts", None) + json_node.pop("top", None) json_node.pop("edge_hashtag_to_media", None) + json_node.pop("recent", None) return json_node def __repr__(self): @@ -1435,30 +1492,33 @@ class Hashtag: return self._metadata("profile_pic_url") @property - def description(self) -> str: + def description(self) -> Optional[str]: return self._metadata("description") @property def allow_following(self) -> bool: - return self._metadata("allow_following") + return bool(self._metadata("allow_following")) @property def is_following(self) -> bool: - return self._metadata("is_following") - - @property - def is_top_media_only(self) -> bool: - return self._metadata("is_top_media_only") - - def get_related_tags(self) -> Iterator["Hashtag"]: - """Yields similar hashtags.""" - yield from (Hashtag(self._context, edge["node"]) - for edge in self._metadata("edge_hashtag_to_related_tags", "edges")) + try: + return self._metadata("is_following") + except KeyError: + return bool(self._metadata("following")) def get_top_posts(self) -> Iterator[Post]: """Yields the top posts of the hashtag.""" - yield from (Post(self._context, edge["node"]) - for edge in self._metadata("edge_hashtag_to_top_posts", "edges")) + try: + yield from (Post(self._context, edge["node"]) + for edge in self._metadata("edge_hashtag_to_top_posts", "edges")) + except KeyError: + yield from SectionIterator( + self._context, + lambda d: d["data"]["top"], + lambda m: Post.from_iphone_struct(self._context, m), + f"explore/tags/{self.name}/", + self._metadata("top"), + ) @property def mediacount(self) -> int: @@ -1468,22 +1528,34 @@ class Hashtag: The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as the hashtag count might include private posts """ - return self._metadata("edge_hashtag_to_media", "count") + try: + return self._metadata("edge_hashtag_to_media", "count") + except KeyError: + return self._metadata("media_count") def get_posts(self) -> Iterator[Post]: - """Yields the posts associated with this hashtag.""" - self._metadata("edge_hashtag_to_media", "edges") - self._metadata("edge_hashtag_to_media", "page_info") - conn = self._metadata("edge_hashtag_to_media") - yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) - while conn["page_info"]["has_next_page"]: - data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]}) - conn = data["edge_hashtag_to_media"] + """Yields the recent posts associated with this hashtag.""" + try: + self._metadata("edge_hashtag_to_media", "edges") + self._metadata("edge_hashtag_to_media", "page_info") + conn = self._metadata("edge_hashtag_to_media") yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) + while conn["page_info"]["has_next_page"]: + data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]}) + conn = data["edge_hashtag_to_media"] + yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) + except KeyError: + yield from SectionIterator( + self._context, + lambda d: d["data"]["recent"], + lambda m: Post.from_iphone_struct(self._context, m), + f"explore/tags/{self.name}/", + self._metadata("recent"), + ) def get_all_posts(self) -> Iterator[Post]: """Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order.""" - sorted_top_posts = iter(sorted(self.get_top_posts(), key=lambda p: p.date_utc, reverse=True)) + sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True)) other_posts = self.get_posts() next_top = next(sorted_top_posts, None) next_other = next(other_posts, None) @@ -1510,6 +1582,20 @@ class Hashtag: yield next_other next_other = next(other_posts, None) + def get_posts_resumable(self) -> NodeIterator[Post]: + """Get the recent posts of the hashtag in a resumable fashion. + + :rtype: NodeIterator[Post] + + .. versionadded:: 4.9""" + return NodeIterator( + self._context, "9b498c08113f1e09617a1703c22b2f32", + lambda d: d['data']['hashtag']['edge_hashtag_to_media'], + lambda n: Post(self._context, n), + {'tag_name': self.name}, + f"https://www.instagram.com/explore/tags/{self.name}/" + ) + class TopSearchResults: """ From a23b832a694a63d49a0e0ca646a282a333c9d19f Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 12 Nov 2021 21:41:40 +0100 Subject: [PATCH 05/12] Add Post.accessibility_caption property --- instaloader/structures.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/instaloader/structures.py b/instaloader/structures.py index 2959f76..5b981a3 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -397,6 +397,16 @@ class Post: return (pcaption[:30] + u"\u2026") if len(pcaption) > 31 else pcaption return _elliptify(self.caption) if self.caption else '' + @property + def accessibility_caption(self) -> Optional[str]: + """Accessibility caption of the post, if available. + + .. versionadded:: 4.9""" + try: + return self._field("accessibility_caption") + except KeyError: + return None + @property def tagged_users(self) -> List[str]: """List of all lowercased users that are tagged in the Post.""" From 5bff9b547466f5528814585f8b63edfb88213de0 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Fri, 12 Nov 2021 21:44:08 +0100 Subject: [PATCH 06/12] First Beta Release for Version 4.9 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index f3b81a8..cbc1dc9 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.8.1' +__version__ = '4.9b1' try: From 2d4f3b8537cfcb33015ec2e3db8ee8114869f745 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:29:28 +0100 Subject: [PATCH 07/12] Make {Post,StoryItem}.date_local timezone aware This commit is made up of 21365ec6688b55c91803040693c3b67cc8820cb8 555c86633c61b106c5c3201e1d5f030a04a4f801 d864ce08ff43a2d86ac6123459bb9fa32b924657 Discussed in #1305, #1316, #1372. Co-Authored-By: Misael Co-Authored-By: Eduardo Kalinowski --- instaloader/instaloader.py | 14 +++++++------- instaloader/structures.py | 26 +++++++++++++++++--------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index af5814d..105586f 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -816,7 +816,7 @@ class Instaloader: last_scraped = latest_stamps.get_last_story_timestamp(name) scraped_timestamp = datetime.now().astimezone() for item in user_story.get_items(): - if latest_stamps is not None and item.date_utc.replace(tzinfo=timezone.utc) <= last_scraped: + if latest_stamps is not None and item.date_local <= last_scraped: break if storyitem_filter is not None and not storyitem_filter(item): self.context.log("<{} skipped>".format(item), flush=True) @@ -1205,7 +1205,7 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped tagged_posts = profile.get_tagged_posts() self.posts_download_loop(tagged_posts, target if target @@ -1213,7 +1213,7 @@ class Instaloader: _PostPathFormatter.sanitize_path(':tagged')), fast_update, post_filter, takewhile=posts_takewhile) if latest_stamps is not None and tagged_posts.first_item is not None: - latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone()) + latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local) def download_igtv(self, profile: Profile, fast_update: bool = False, post_filter: Optional[Callable[[Post], bool]] = None, @@ -1228,12 +1228,12 @@ class Instaloader: posts_takewhile: Optional[Callable[[Post], bool]] = None if latest_stamps is not None: last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped igtv_posts = profile.get_igtv_posts() self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and igtv_posts.first_item is not None: - latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone()) + latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local) def _get_id_filename(self, profile_name: str) -> str: if ((format_string_contains_key(self.dirname_pattern, 'profile') or @@ -1426,14 +1426,14 @@ class Instaloader: if latest_stamps is not None: # pylint:disable=cell-var-from-loop last_scraped = latest_stamps.get_last_post_timestamp(profile_name) - posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped + posts_takewhile = lambda p: p.date_local > last_scraped posts_to_download = profile.get_posts() self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, total_count=profile.mediacount, owner_profile=profile, takewhile=posts_takewhile) if latest_stamps is not None and posts_to_download.first_item is not None: latest_stamps.set_last_post_timestamp(profile_name, - posts_to_download.first_item.date_local.astimezone()) + posts_to_download.first_item.date_local) if stories and profiles: with self.context.error_catcher("Download stories"): diff --git a/instaloader/structures.py b/instaloader/structures.py index d244e55..c4c51c3 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -262,17 +262,16 @@ class Post: @property def date_local(self) -> datetime: - """Timestamp when the post was created (local time zone).""" - return datetime.fromtimestamp(self._node["date"] - if "date" in self._node - else self._node["taken_at_timestamp"]) + """Timestamp when the post was created (local time zone). + + .. versionchanged:: 4.9 + Return timezone aware datetime object.""" + return datetime.fromtimestamp(self._get_timestamp_date_created()).astimezone() @property def date_utc(self) -> datetime: """Timestamp when the post was created (UTC).""" - return datetime.utcfromtimestamp(self._node["date"] - if "date" in self._node - else self._node["taken_at_timestamp"]) + return datetime.utcfromtimestamp(self._get_timestamp_date_created()) @property def date(self) -> datetime: @@ -313,6 +312,12 @@ class Post: return len(edges) return 1 + def _get_timestamp_date_created(self) -> float: + """Timestamp when the post was created""" + return (self._node["date"] + if "date" in self._node + else self._node["taken_at_timestamp"]) + def get_is_videos(self) -> List[bool]: """ Return a list containing the ``is_video`` property for each media in the post. @@ -1140,8 +1145,11 @@ class StoryItem: @property def date_local(self) -> datetime: - """Timestamp when the StoryItem was created (local time zone).""" - return datetime.fromtimestamp(self._node['taken_at_timestamp']) + """Timestamp when the StoryItem was created (local time zone). + + .. versionchanged:: 4.9 + Return timezone aware datetime object.""" + return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone() @property def date_utc(self) -> datetime: From 37a93ee59abb3dc10595b3e8df944775a6d64ead Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:31:33 +0100 Subject: [PATCH 08/12] Second Beta Release for Version 4.9 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index cbc1dc9..767ac6f 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.9b1' +__version__ = '4.9b2' try: From 604b107586f94f6f44c130a71e8990a30bc47678 Mon Sep 17 00:00:00 2001 From: canh Date: Thu, 17 Mar 2022 15:27:36 +0100 Subject: [PATCH 09/12] Add --sanitize-paths option (#1452) --- docs/cli-options.rst | 7 +++++ instaloader/__main__.py | 6 ++++- instaloader/instaloader.py | 52 ++++++++++++++++++++++++++------------ 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 9d5b0cd..88b4408 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -239,6 +239,13 @@ How to Download .. versionadded:: 4.8 +.. option:: --sanitize-paths + + Force sanitization of paths so that the resulting file and directory names + are valid on both Windows and Unix. + + .. versionadded:: 4.9 + .. option:: --resume-prefix prefix For many targets, Instaloader is capable of resuming a previously-aborted diff --git a/instaloader/__main__.py b/instaloader/__main__.py index 6169916..97da328 100644 --- a/instaloader/__main__.py +++ b/instaloader/__main__.py @@ -383,6 +383,9 @@ def main(): g_how.add_argument('--resume-prefix', metavar='PREFIX', help='Prefix for filenames that are used to save the information to resume an interrupted ' 'download.') + g_how.add_argument('--sanitize-paths', action='store_true', + help='Sanitize paths so that the resulting file and directory names are valid on both ' + 'Windows and Unix.') g_how.add_argument('--no-resume', action='store_true', help='Do not resume a previously-aborted download iteration, and do not save such information ' 'when interrupted.') @@ -463,7 +466,8 @@ def main(): slide=args.slide, fatal_status_codes=args.abort_on, iphone_support=not args.no_iphone, - title_pattern=args.title_pattern) + title_pattern=args.title_pattern, + sanitize_paths=args.sanitize_paths) _main(loader, args.profile, username=args.login.lower() if args.login is not None else None, diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 105586f..b23d96c 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -137,24 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter): class _PostPathFormatter(_ArbitraryItemFormatter): + RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL', + 'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', + 'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'} + + def __init__(self, item: Any, force_windows_path: bool = False): + super().__init__(item) + self.force_windows_path = force_windows_path + def get_value(self, key, args, kwargs): ret = super().get_value(key, args, kwargs) if not isinstance(ret, str): return ret - return self.sanitize_path(ret) + return self.sanitize_path(ret, self.force_windows_path) @staticmethod - def sanitize_path(ret: str) -> str: + def sanitize_path(ret: str, force_windows_path: bool = False) -> str: """Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows.""" ret = ret.replace('/', '\u2215') if ret.startswith('.'): ret = ret.replace('.', '\u2024', 1) - if platform.system() == 'Windows': + if force_windows_path or platform.system() == 'Windows': ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') ret = ret.replace('\n', ' ').replace('\r', ' ') + root, ext = os.path.splitext(ret) + if root.upper() in _PostPathFormatter.RESERVED: + root += '_' + if ext == '.': + ext = '\u2024' + ret = root + ext return ret @@ -187,6 +201,7 @@ class Instaloader: :param slide: :option:`--slide` :param fatal_status_codes: :option:`--abort-on` :param iphone_support: not :option:`--no-iphone` + :param sanitize_paths: :option:`--sanitize-paths` .. attribute:: context @@ -216,7 +231,8 @@ class Instaloader: slide: Optional[str] = None, fatal_status_codes: Optional[List[int]] = None, iphone_support: bool = True, - title_pattern: Optional[str] = None): + title_pattern: Optional[str] = None, + sanitize_paths: bool = False): self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, request_timeout, rate_controller, fatal_status_codes, @@ -233,6 +249,7 @@ class Instaloader: self.title_pattern = '{date_utc}_UTC_{typename}' else: self.title_pattern = '{target}_{date_utc}_UTC_{typename}' + self.sanitize_paths = sanitize_paths self.download_pictures = download_pictures self.download_videos = download_videos self.download_video_thumbnails = download_video_thumbnails @@ -296,7 +313,8 @@ class Instaloader: check_resume_bbd=self.check_resume_bbd, slide=self.slide, fatal_status_codes=self.context.fatal_status_codes, - iphone_support=self.context.iphone_support) + iphone_support=self.context.iphone_support, + sanitize_paths=self.sanitize_paths) yield new_loader self.context.error_log.extend(new_loader.context.error_log) new_loader.context.error_log = [] # avoid double-printing of errors @@ -506,9 +524,10 @@ class Instaloader: pic_bytes = http_response.content ig_filename = url.split('/')[-1].split('?')[0] pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object) - dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target) - filename_template = os.path.join(dirname, - _PostPathFormatter(pic_data).format(self.title_pattern, target=target)) + dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target) + filename_template = os.path.join( + dirname, + _PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target)) filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg" content_length = http_response.headers.get('Content-Length', None) if os.path.isfile(filename) and (not self.context.is_logged_in or @@ -633,7 +652,7 @@ class Instaloader: """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. .. versionadded:: 4.1""" - return _PostPathFormatter(item).format(self.filename_pattern, target=target) + return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target) def download_post(self, post: Post, target: Union[str, Path]) -> bool: """ @@ -665,7 +684,7 @@ class Instaloader: return False return True - dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) + dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target) filename_template = os.path.join(dirname, self.format_filename(post, target=target)) filename = self.__prepare_filename(filename_template, lambda: post.url) @@ -846,7 +865,7 @@ class Instaloader: return True date_local = item.date_local - dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) + dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target) filename_template = os.path.join(dirname, self.format_filename(item, target=target)) filename = self.__prepare_filename(filename_template, lambda: item.url) downloaded = False @@ -914,8 +933,9 @@ class Instaloader: name = user_highlight.owner_username highlight_target = (filename_target if filename_target - else (Path(_PostPathFormatter.sanitize_path(name)) / - _PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path] + else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) / + _PostPathFormatter.sanitize_path(user_highlight.title, + self.sanitize_paths))) # type: Union[str, Path] self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name)) self.download_highlight_cover(user_highlight, highlight_target) totalcount = user_highlight.itemcount @@ -965,7 +985,7 @@ class Instaloader: else total_count) sanitized_target = target if isinstance(target, str): - sanitized_target = _PostPathFormatter.sanitize_path(target) + sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths) if takewhile is None: takewhile = lambda _: True with resumable_iteration( @@ -1209,8 +1229,8 @@ class Instaloader: tagged_posts = profile.get_tagged_posts() self.posts_download_loop(tagged_posts, target if target - else (Path(_PostPathFormatter.sanitize_path(profile.username)) / - _PostPathFormatter.sanitize_path(':tagged')), + else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) / + _PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)), fast_update, post_filter, takewhile=posts_takewhile) if latest_stamps is not None and tagged_posts.first_item is not None: latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local) From d938f3e2f55051b65997d734bec6512dd8dcc9cd Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 19 Mar 2022 16:35:53 +0100 Subject: [PATCH 10/12] Third Beta Release for Version 4.9 --- instaloader/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instaloader/__init__.py b/instaloader/__init__.py index 767ac6f..737b630 100644 --- a/instaloader/__init__.py +++ b/instaloader/__init__.py @@ -1,7 +1,7 @@ """Download pictures (or videos) along with their captions and other metadata from Instagram.""" -__version__ = '4.9b2' +__version__ = '4.9b3' try: From fbd7df2a90c459807cd8af3e8bf881529082c8f0 Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 26 Mar 2022 16:59:42 +0100 Subject: [PATCH 11/12] Use resumable iteration to download hashtags Closes #1457. --- docs/cli-options.rst | 3 ++- instaloader/instaloader.py | 6 +++--- instaloader/structures.py | 7 +++++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/cli-options.rst b/docs/cli-options.rst index 88b4408..f8a187f 100644 --- a/docs/cli-options.rst +++ b/docs/cli-options.rst @@ -258,7 +258,8 @@ How to Download - Profile posts, - Profile IGTV posts (:option:`--igtv`), - Profile tagged posts (:option:`--tagged`), - - Saved posts (``:saved``). + - Saved posts (``:saved``), + - Hashtags. This feature is enabled by default for targets where it is supported; :option:`--resume-prefix` only changes the name of the iterator files. diff --git a/instaloader/instaloader.py b/instaloader/instaloader.py index 9371c3f..31f8c90 100644 --- a/instaloader/instaloader.py +++ b/instaloader/instaloader.py @@ -1176,8 +1176,8 @@ class Instaloader: """Get Posts associated with a #hashtag. .. deprecated:: 4.4 - Use :meth:`Hashtag.get_posts`.""" - return Hashtag.from_name(self.context, hashtag).get_posts() + Use :meth:`Hashtag.get_posts_resumable`.""" + return Hashtag.from_name(self.context, hashtag).get_posts_resumable() def download_hashtag(self, hashtag: Union[Hashtag, str], max_count: Optional[int] = None, @@ -1213,7 +1213,7 @@ class Instaloader: self.download_hashtag_profilepic(hashtag) if posts: self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name)) - self.posts_download_loop(hashtag.get_all_posts(), target, fast_update, post_filter, + self.posts_download_loop(hashtag.get_posts_resumable(), target, fast_update, post_filter, max_count=max_count) if self.save_metadata: json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target, diff --git a/instaloader/structures.py b/instaloader/structures.py index 11f1da7..eb638d0 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1553,7 +1553,10 @@ class Hashtag: return self._metadata("media_count") def get_posts(self) -> Iterator[Post]: - """Yields the recent posts associated with this hashtag.""" + """Yields the recent posts associated with this hashtag. + + .. deprecated:: 4.9 + Use :meth:`Hashtag.get_posts_resumable` as this method may return incorrect results (:issue:`1457`)""" try: self._metadata("edge_hashtag_to_media", "edges") self._metadata("edge_hashtag_to_media", "page_info") @@ -1575,7 +1578,7 @@ class Hashtag: def get_all_posts(self) -> Iterator[Post]: """Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order.""" sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True)) - other_posts = self.get_posts() + other_posts = self.get_posts_resumable() next_top = next(sorted_top_posts, None) next_other = next(other_posts, None) while next_top is not None or next_other is not None: From c0e5d0475bff1507c930d4ceefb5d66686caee5d Mon Sep 17 00:00:00 2001 From: Alexander Graf <17130992+aandergr@users.noreply.github.com> Date: Sat, 26 Mar 2022 17:03:57 +0100 Subject: [PATCH 12/12] StoryItem.from_mediaid() Addresses #1238. --- instaloader/structures.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/instaloader/structures.py b/instaloader/structures.py index eb638d0..8b5e986 100644 --- a/instaloader/structures.py +++ b/instaloader/structures.py @@ -1115,6 +1115,21 @@ class StoryItem: def __hash__(self) -> int: return hash(self.mediaid) + @classmethod + def from_mediaid(cls, context: InstaloaderContext, mediaid: int): + """Create a StoryItem object from a given mediaid. + + .. versionadded:: 4.9 + """ + pic_json = context.graphql_query( + '2b0673e0dc4580674a88d426fe00ea90', + {'shortcode': Post.mediaid_to_shortcode(mediaid)} + ) + shortcode_media = pic_json['data']['shortcode_media'] + if shortcode_media is None: + raise BadResponseException("Fetching StoryItem metadata failed.") + return cls(context, shortcode_media) + @property def _iphone_struct(self) -> Dict[str, Any]: if not self._context.iphone_support: