Merge branch 'upcoming/v4.9'

This commit is contained in:
Alexander Graf 2022-03-26 17:15:09 +01:00
commit 385c6c8a35
9 changed files with 374 additions and 154 deletions

View File

@ -12,6 +12,8 @@ pefile = "*"
pywin32-ctypes = "*" pywin32-ctypes = "*"
psutil = "*" psutil = "*"
typing-extensions = "*" typing-extensions = "*"
types-requests = "*"
typed-ast = "*"
[packages] [packages]
requests = "*" requests = "*"

168
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{ {
"_meta": { "_meta": {
"hash": { "hash": {
"sha256": "9704331c2f9e76af9e5ee386bc359a46ee66d1e2f93aa880d9d148da7ba0e6f5" "sha256": "4ea11d3dd7ae0070a8745cf21960210a09834ca21f47905fd3fab20ea68c6d3b"
}, },
"pipfile-spec": 6, "pipfile-spec": 6,
"requires": {}, "requires": {},
@ -21,35 +21,36 @@
], ],
"version": "==2021.5.30" "version": "==2021.5.30"
}, },
"chardet": { "charset-normalizer": {
"hashes": [ "hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
], ],
"version": "==4.0.0" "markers": "python_version >= '3'",
"version": "==2.0.4"
}, },
"idna": { "idna": {
"hashes": [ "hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
], ],
"version": "==2.10" "markers": "python_version >= '3'",
"version": "==3.2"
}, },
"requests": { "requests": {
"hashes": [ "hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.25.1" "version": "==2.26.0"
}, },
"urllib3": { "urllib3": {
"hashes": [ "hashes": [
"sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c", "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098" "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
], ],
"index": "pypi", "version": "==1.26.6"
"version": "==1.26.5"
} }
}, },
"develop": { "develop": {
@ -69,10 +70,10 @@
}, },
"astroid": { "astroid": {
"hashes": [ "hashes": [
"sha256:3c9a2d84354185d13213ff2640ec03d39168dbcd13648abc84fb13ca3b2e2761", "sha256:3975a0bd5373bdce166e60c851cfcbaf21ee96de80ec518c1f4cb3e94c3fb334",
"sha256:d66a600e1602736a0f24f725a511b0e50d12eb18f54b31ec276d2c26a0a62c6a" "sha256:ab7f36e8a78b8e54a62028ba6beef7561db4cdb6f2a5009ecc44a6f42b5697ef"
], ],
"version": "==2.5.7" "version": "==2.6.6"
}, },
"babel": { "babel": {
"hashes": [ "hashes": [
@ -88,12 +89,13 @@
], ],
"version": "==2021.5.30" "version": "==2021.5.30"
}, },
"chardet": { "charset-normalizer": {
"hashes": [ "hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", "sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" "sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
], ],
"version": "==4.0.0" "markers": "python_version >= '3'",
"version": "==2.0.4"
}, },
"docutils": { "docutils": {
"hashes": [ "hashes": [
@ -110,10 +112,11 @@
}, },
"idna": { "idna": {
"hashes": [ "hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
], ],
"version": "==2.10" "markers": "python_version >= '3'",
"version": "==3.2"
}, },
"imagesize": { "imagesize": {
"hashes": [ "hashes": [
@ -124,10 +127,10 @@
}, },
"isort": { "isort": {
"hashes": [ "hashes": [
"sha256:0a943902919f65c5684ac4e0154b1ad4fac6dcaa5d9f3426b732f1c8b5419be6", "sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899",
"sha256:2bb1680aad211e3c9944dbce1d4ba09a989f04e238296c87fe2139faa26d655d" "sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"
], ],
"version": "==5.8.0" "version": "==5.9.3"
}, },
"jinja2": { "jinja2": {
"hashes": [ "hashes": [
@ -211,31 +214,32 @@
}, },
"mypy": { "mypy": {
"hashes": [ "hashes": [
"sha256:0d0a87c0e7e3a9becdfbe936c981d32e5ee0ccda3e0f07e1ef2c3d1a817cf73e", "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9",
"sha256:25adde9b862f8f9aac9d2d11971f226bd4c8fbaa89fb76bdadb267ef22d10064", "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a",
"sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c", "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9",
"sha256:2f9b3407c58347a452fc0736861593e105139b905cca7d097e413453a1d650b4", "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e",
"sha256:33f159443db0829d16f0a8d83d94df3109bb6dd801975fe86bacb9bf71628e97", "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2",
"sha256:3f2aca7f68580dc2508289c729bd49ee929a436208d2b2b6aab15745a70a57df", "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212",
"sha256:499c798053cdebcaa916eef8cd733e5584b5909f789de856b482cd7d069bdad8", "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b",
"sha256:4eec37370483331d13514c3f55f446fc5248d6373e7029a29ecb7b7494851e7a", "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885",
"sha256:552a815579aa1e995f39fd05dde6cd378e191b063f031f2acfe73ce9fb7f9e56", "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150",
"sha256:5873888fff1c7cf5b71efbe80e0e73153fe9212fafdf8e44adfe4c20ec9f82d7", "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703",
"sha256:61a3d5b97955422964be6b3baf05ff2ce7f26f52c85dd88db11d5e03e146a3a6", "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072",
"sha256:674e822aa665b9fd75130c6c5f5ed9564a38c6cea6a6432ce47eafb68ee578c5", "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457",
"sha256:7ce3175801d0ae5fdfa79b4f0cfed08807af4d075b402b7e294e6aa72af9aa2a", "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e",
"sha256:9743c91088d396c1a5a3c9978354b61b0382b4e3c440ce83cf77994a43e8c521", "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0",
"sha256:9f94aac67a2045ec719ffe6111df543bac7874cee01f41928f6969756e030564", "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb",
"sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49", "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97",
"sha256:abf7e0c3cf117c44d9285cc6128856106183938c68fd4944763003decdcfeb66", "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8",
"sha256:b09669bcda124e83708f34a94606e01b614fa71931d356c1f1a5297ba11f110a", "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811",
"sha256:cd07039aa5df222037005b08fbbfd69b3ab0b0bd7a07d7906de75ae52c4e3119", "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6",
"sha256:d23e0ea196702d918b60c8288561e722bf437d82cb7ef2edcd98cfa38905d506", "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de",
"sha256:d65cc1df038ef55a99e617431f0553cd77763869eebdf9042403e16089fe746c", "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504",
"sha256:d7da2e1d5f558c37d6e8c1246f1aec1e7349e4913d8fb3cb289a35de573fe2eb" "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921",
"sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"
], ],
"index": "pypi", "index": "pypi",
"version": "==0.812" "version": "==0.910"
}, },
"mypy-extensions": { "mypy-extensions": {
"hashes": [ "hashes": [
@ -246,17 +250,17 @@
}, },
"packaging": { "packaging": {
"hashes": [ "hashes": [
"sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5", "sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7",
"sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a" "sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"
], ],
"version": "==20.9" "version": "==21.0"
}, },
"pefile": { "pefile": {
"hashes": [ "hashes": [
"sha256:a5d6e8305c6b210849b47a6174ddf9c452b2888340b8177874b862ba6c207645" "sha256:ed79b2353daa58421459abf4d685953bde0adf9f6e188944f97ba9795f100246"
], ],
"index": "pypi", "index": "pypi",
"version": "==2019.4.18" "version": "==2021.5.24"
}, },
"psutil": { "psutil": {
"hashes": [ "hashes": [
@ -301,25 +305,31 @@
}, },
"pyinstaller": { "pyinstaller": {
"hashes": [ "hashes": [
"sha256:f5c0eeb2aa663cce9a5404292c0195011fa500a6501c873a466b2e8cad3c950c" "sha256:30733baaf8971902286a0ddf77e5499ac5f7bf8e7c39163e83d4f8c696ef265e",
"sha256:4d848cd782ee0893d7ad9fe2bfe535206a79f0b6760cecc5f2add831258b9322",
"sha256:8f747b190e6ad30e2d2fd5da9a64636f61aac8c038c0b7f685efa92c782ea14f",
"sha256:aae456205c68355f9597411090576bb31b614e53976b4c102d072bbe5db8392a",
"sha256:c587da8f521a7ce1b9efb4e3d0117cd63c92dc6cedff24590aeef89372f53012",
"sha256:ecc2baadeeefd2b6fbf39d13c65d4aa603afdda1c6aaaebc4577ba72893fee9e",
"sha256:fed9f5e4802769a416a8f2ca171c6be961d1861cc05a0b71d20dfe05423137e9"
], ],
"index": "pypi", "index": "pypi",
"version": "==4.2" "version": "==4.5.1"
}, },
"pyinstaller-hooks-contrib": { "pyinstaller-hooks-contrib": {
"hashes": [ "hashes": [
"sha256:27558072021857d89524c42136feaa2ffe4f003f1bdf0278f9b24f6902c1759c", "sha256:57964f93eb69255c49159ffdf052aae893feed223b0f69773dfd010ca6c569d9",
"sha256:892310e6363655838485ee748bf1c5e5cade7963686d9af8650ee218a3e0b031" "sha256:7f5d0689b30da3092149fc536a835a94045ac8c9f0e6dfb23ac171890f5ea8f2"
], ],
"version": "==2021.1" "version": "==2021.2"
}, },
"pylint": { "pylint": {
"hashes": [ "hashes": [
"sha256:0e21d3b80b96740909d77206d741aa3ce0b06b41be375d92e1f3244a274c1f8a", "sha256:2e1a0eb2e8ab41d6b5dbada87f066492bb1557b12b76c47c2ee8aa8a11186594",
"sha256:d09b0b07ba06bcdff463958f53f23df25e740ecd81895f7d2699ec04bbd8dc3b" "sha256:8b838c8983ee1904b2de66cce9d0b96649a91901350e956d78f289c3bc87b48e"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.7.2" "version": "==2.9.6"
}, },
"pyparsing": { "pyparsing": {
"hashes": [ "hashes": [
@ -345,11 +355,11 @@
}, },
"requests": { "requests": {
"hashes": [ "hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
], ],
"index": "pypi", "index": "pypi",
"version": "==2.25.1" "version": "==2.26.0"
}, },
"snowballstemmer": { "snowballstemmer": {
"hashes": [ "hashes": [
@ -448,24 +458,32 @@
"sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f", "sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
"sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65" "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
], ],
"index": "pypi",
"version": "==1.4.3" "version": "==1.4.3"
}, },
"types-requests": {
"hashes": [
"sha256:a5a305b43ea57bf64d6731f89816946a405b591eff6de28d4c0fd58422cee779",
"sha256:e21541c0f55c066c491a639309159556dd8c5833e49fcde929c4c47bdb0002ee"
],
"index": "pypi",
"version": "==2.25.6"
},
"typing-extensions": { "typing-extensions": {
"hashes": [ "hashes": [
"sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918", "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
"sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c", "sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
"sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f" "sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
], ],
"index": "pypi", "index": "pypi",
"version": "==3.7.4.3" "version": "==3.10.0.0"
}, },
"urllib3": { "urllib3": {
"hashes": [ "hashes": [
"sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c", "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098" "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
], ],
"index": "pypi", "version": "==1.26.6"
"version": "==1.26.5"
}, },
"wrapt": { "wrapt": {
"hashes": [ "hashes": [

View File

@ -239,6 +239,13 @@ How to Download
.. versionadded:: 4.8 .. versionadded:: 4.8
.. option:: --sanitize-paths
Force sanitization of paths so that the resulting file and directory names
are valid on both Windows and Unix.
.. versionadded:: 4.9
.. option:: --resume-prefix prefix .. option:: --resume-prefix prefix
For many targets, Instaloader is capable of resuming a previously-aborted For many targets, Instaloader is capable of resuming a previously-aborted
@ -251,7 +258,8 @@ How to Download
- Profile posts, - Profile posts,
- Profile IGTV posts (:option:`--igtv`), - Profile IGTV posts (:option:`--igtv`),
- Profile tagged posts (:option:`--tagged`), - Profile tagged posts (:option:`--tagged`),
- Saved posts (``:saved``). - Saved posts (``:saved``),
- Hashtags.
This feature is enabled by default for targets where it is supported; This feature is enabled by default for targets where it is supported;
:option:`--resume-prefix` only changes the name of the iterator files. :option:`--resume-prefix` only changes the name of the iterator files.
@ -273,7 +281,7 @@ How to Download
.. option:: --user-agent USER_AGENT .. option:: --user-agent USER_AGENT
User Agent to use for HTTP requests. Per default, Instaloader pretends being User Agent to use for HTTP requests. Per default, Instaloader pretends being
Chrome/89 on Linux. Chrome/92 on Linux.
.. option:: --max-connection-attempts N .. option:: --max-connection-attempts N

View File

@ -1,7 +1,7 @@
"""Download pictures (or videos) along with their captions and other metadata from Instagram.""" """Download pictures (or videos) along with their captions and other metadata from Instagram."""
__version__ = '4.8.5' __version__ = '4.9b3'
try: try:

View File

@ -383,6 +383,9 @@ def main():
g_how.add_argument('--resume-prefix', metavar='PREFIX', g_how.add_argument('--resume-prefix', metavar='PREFIX',
help='Prefix for filenames that are used to save the information to resume an interrupted ' help='Prefix for filenames that are used to save the information to resume an interrupted '
'download.') 'download.')
g_how.add_argument('--sanitize-paths', action='store_true',
help='Sanitize paths so that the resulting file and directory names are valid on both '
'Windows and Unix.')
g_how.add_argument('--no-resume', action='store_true', g_how.add_argument('--no-resume', action='store_true',
help='Do not resume a previously-aborted download iteration, and do not save such information ' help='Do not resume a previously-aborted download iteration, and do not save such information '
'when interrupted.') 'when interrupted.')
@ -463,7 +466,8 @@ def main():
slide=args.slide, slide=args.slide,
fatal_status_codes=args.abort_on, fatal_status_codes=args.abort_on,
iphone_support=not args.no_iphone, iphone_support=not args.no_iphone,
title_pattern=args.title_pattern) title_pattern=args.title_pattern,
sanitize_paths=args.sanitize_paths)
_main(loader, _main(loader,
args.profile, args.profile,
username=args.login.lower() if args.login is not None else None, username=args.login.lower() if args.login is not None else None,

View File

@ -22,6 +22,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController from .instaloadercontext import InstaloaderContext, RateController
from .lateststamps import LatestStamps from .lateststamps import LatestStamps
from .nodeiterator import NodeIterator, resumable_iteration from .nodeiterator import NodeIterator, resumable_iteration
from .sectioniterator import SectionIterator
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem, from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic) load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
@ -136,20 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter):
class _PostPathFormatter(_ArbitraryItemFormatter): class _PostPathFormatter(_ArbitraryItemFormatter):
RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
def __init__(self, item: Any, force_windows_path: bool = False):
super().__init__(item)
self.force_windows_path = force_windows_path
def get_value(self, key, args, kwargs): def get_value(self, key, args, kwargs):
ret = super().get_value(key, args, kwargs) ret = super().get_value(key, args, kwargs)
if not isinstance(ret, str): if not isinstance(ret, str):
return ret return ret
return self.sanitize_path(ret) return self.sanitize_path(ret, self.force_windows_path)
@staticmethod @staticmethod
def sanitize_path(ret: str) -> str: def sanitize_path(ret: str, force_windows_path: bool = False) -> str:
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows.""" """Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
ret = ret.replace('/', '\u2215') ret = ret.replace('/', '\u2215')
if platform.system() == 'Windows':
if ret.startswith('.'):
ret = ret.replace('.', '\u2024', 1)
if force_windows_path or platform.system() == 'Windows':
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02') ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a') ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
ret = ret.replace('\n', ' ').replace('\r', ' ') ret = ret.replace('\n', ' ').replace('\r', ' ')
root, ext = os.path.splitext(ret)
if root.upper() in _PostPathFormatter.RESERVED:
root += '_'
if ext == '.':
ext = '\u2024'
ret = root + ext
return ret return ret
@ -182,6 +201,7 @@ class Instaloader:
:param slide: :option:`--slide` :param slide: :option:`--slide`
:param fatal_status_codes: :option:`--abort-on` :param fatal_status_codes: :option:`--abort-on`
:param iphone_support: not :option:`--no-iphone` :param iphone_support: not :option:`--no-iphone`
:param sanitize_paths: :option:`--sanitize-paths`
.. attribute:: context .. attribute:: context
@ -211,7 +231,8 @@ class Instaloader:
slide: Optional[str] = None, slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None, fatal_status_codes: Optional[List[int]] = None,
iphone_support: bool = True, iphone_support: bool = True,
title_pattern: Optional[str] = None): title_pattern: Optional[str] = None,
sanitize_paths: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts, self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller, fatal_status_codes, request_timeout, rate_controller, fatal_status_codes,
@ -228,6 +249,7 @@ class Instaloader:
self.title_pattern = '{date_utc}_UTC_{typename}' self.title_pattern = '{date_utc}_UTC_{typename}'
else: else:
self.title_pattern = '{target}_{date_utc}_UTC_{typename}' self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
self.sanitize_paths = sanitize_paths
self.download_pictures = download_pictures self.download_pictures = download_pictures
self.download_videos = download_videos self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails self.download_video_thumbnails = download_video_thumbnails
@ -291,7 +313,8 @@ class Instaloader:
check_resume_bbd=self.check_resume_bbd, check_resume_bbd=self.check_resume_bbd,
slide=self.slide, slide=self.slide,
fatal_status_codes=self.context.fatal_status_codes, fatal_status_codes=self.context.fatal_status_codes,
iphone_support=self.context.iphone_support) iphone_support=self.context.iphone_support,
sanitize_paths=self.sanitize_paths)
yield new_loader yield new_loader
self.context.error_log.extend(new_loader.context.error_log) self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors new_loader.context.error_log = [] # avoid double-printing of errors
@ -511,9 +534,10 @@ class Instaloader:
pic_bytes = http_response.content pic_bytes = http_response.content
ig_filename = url.split('/')[-1].split('?')[0] ig_filename = url.split('/')[-1].split('?')[0]
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object) pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target) dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, filename_template = os.path.join(
_PostPathFormatter(pic_data).format(self.title_pattern, target=target)) dirname,
_PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target))
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg" filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
content_length = http_response.headers.get('Content-Length', None) content_length = http_response.headers.get('Content-Length', None)
if os.path.isfile(filename) and (not self.context.is_logged_in or if os.path.isfile(filename) and (not self.context.is_logged_in or
@ -638,7 +662,7 @@ class Instaloader:
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter. """Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1""" .. versionadded:: 4.1"""
return _PostPathFormatter(item).format(self.filename_pattern, target=target) return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target)
def download_post(self, post: Post, target: Union[str, Path]) -> bool: def download_post(self, post: Post, target: Union[str, Path]) -> bool:
""" """
@ -670,7 +694,7 @@ class Instaloader:
return False return False
return True return True
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target) dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(post, target=target)) filename_template = os.path.join(dirname, self.format_filename(post, target=target))
filename = self.__prepare_filename(filename_template, lambda: post.url) filename = self.__prepare_filename(filename_template, lambda: post.url)
@ -821,7 +845,7 @@ class Instaloader:
last_scraped = latest_stamps.get_last_story_timestamp(name) last_scraped = latest_stamps.get_last_story_timestamp(name)
scraped_timestamp = datetime.now().astimezone() scraped_timestamp = datetime.now().astimezone()
for item in user_story.get_items(): for item in user_story.get_items():
if latest_stamps is not None and item.date_utc.replace(tzinfo=timezone.utc) <= last_scraped: if latest_stamps is not None and item.date_local <= last_scraped:
break break
if storyitem_filter is not None and not storyitem_filter(item): if storyitem_filter is not None and not storyitem_filter(item):
self.context.log("<{} skipped>".format(item), flush=True) self.context.log("<{} skipped>".format(item), flush=True)
@ -851,7 +875,7 @@ class Instaloader:
return True return True
date_local = item.date_local date_local = item.date_local
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target) dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(item, target=target)) filename_template = os.path.join(dirname, self.format_filename(item, target=target))
filename = self.__prepare_filename(filename_template, lambda: item.url) filename = self.__prepare_filename(filename_template, lambda: item.url)
downloaded = False downloaded = False
@ -919,8 +943,9 @@ class Instaloader:
name = user_highlight.owner_username name = user_highlight.owner_username
highlight_target = (filename_target highlight_target = (filename_target
if filename_target if filename_target
else (Path(_PostPathFormatter.sanitize_path(name)) / else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path] _PostPathFormatter.sanitize_path(user_highlight.title,
self.sanitize_paths))) # type: Union[str, Path]
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name)) self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
self.download_highlight_cover(user_highlight, highlight_target) self.download_highlight_cover(user_highlight, highlight_target)
totalcount = user_highlight.itemcount totalcount = user_highlight.itemcount
@ -970,7 +995,7 @@ class Instaloader:
else total_count) else total_count)
sanitized_target = target sanitized_target = target
if isinstance(target, str): if isinstance(target, str):
sanitized_target = _PostPathFormatter.sanitize_path(target) sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths)
if takewhile is None: if takewhile is None:
takewhile = lambda _: True takewhile = lambda _: True
with resumable_iteration( with resumable_iteration(
@ -1097,18 +1122,12 @@ class Instaloader:
.. versionchanged:: 4.2.9 .. versionchanged:: 4.2.9
Require being logged in (as required by Instagram) Require being logged in (as required by Instagram)
""" """
has_next_page = True yield from SectionIterator(
end_cursor = None self.context,
while has_next_page: lambda d: d["native_location_data"]["recent"],
if end_cursor: lambda m: Post.from_iphone_struct(self.context, m),
params = {'__a': 1, 'max_id': end_cursor} f"explore/locations/{location}/",
else: )
params = {'__a': 1}
location_data = self.context.get_json('explore/locations/{0}/'.format(location),
params)['graphql']['location']['edge_location_to_media']
yield from (Post(self.context, edge['node']) for edge in location_data['edges'])
has_next_page = location_data['page_info']['has_next_page']
end_cursor = location_data['page_info']['end_cursor']
@_requires_login @_requires_login
def download_location(self, location: str, def download_location(self, location: str,
@ -1157,8 +1176,8 @@ class Instaloader:
"""Get Posts associated with a #hashtag. """Get Posts associated with a #hashtag.
.. deprecated:: 4.4 .. deprecated:: 4.4
Use :meth:`Hashtag.get_posts`.""" Use :meth:`Hashtag.get_posts_resumable`."""
return Hashtag.from_name(self.context, hashtag).get_posts() return Hashtag.from_name(self.context, hashtag).get_posts_resumable()
def download_hashtag(self, hashtag: Union[Hashtag, str], def download_hashtag(self, hashtag: Union[Hashtag, str],
max_count: Optional[int] = None, max_count: Optional[int] = None,
@ -1194,7 +1213,7 @@ class Instaloader:
self.download_hashtag_profilepic(hashtag) self.download_hashtag_profilepic(hashtag)
if posts: if posts:
self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name)) self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name))
self.posts_download_loop(hashtag.get_all_posts(), target, fast_update, post_filter, self.posts_download_loop(hashtag.get_posts_resumable(), target, fast_update, post_filter,
max_count=max_count) max_count=max_count)
if self.save_metadata: if self.save_metadata:
json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target, json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target,
@ -1216,15 +1235,15 @@ class Instaloader:
posts_takewhile: Optional[Callable[[Post], bool]] = None posts_takewhile: Optional[Callable[[Post], bool]] = None
if latest_stamps is not None: if latest_stamps is not None:
last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username) last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped posts_takewhile = lambda p: p.date_local > last_scraped
tagged_posts = profile.get_tagged_posts() tagged_posts = profile.get_tagged_posts()
self.posts_download_loop(tagged_posts, self.posts_download_loop(tagged_posts,
target if target target if target
else (Path(_PostPathFormatter.sanitize_path(profile.username)) / else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(':tagged')), _PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)),
fast_update, post_filter, takewhile=posts_takewhile) fast_update, post_filter, takewhile=posts_takewhile)
if latest_stamps is not None and tagged_posts.first_item is not None: if latest_stamps is not None and tagged_posts.first_item is not None:
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone()) latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)
def download_igtv(self, profile: Profile, fast_update: bool = False, def download_igtv(self, profile: Profile, fast_update: bool = False,
post_filter: Optional[Callable[[Post], bool]] = None, post_filter: Optional[Callable[[Post], bool]] = None,
@ -1239,12 +1258,12 @@ class Instaloader:
posts_takewhile: Optional[Callable[[Post], bool]] = None posts_takewhile: Optional[Callable[[Post], bool]] = None
if latest_stamps is not None: if latest_stamps is not None:
last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username) last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped posts_takewhile = lambda p: p.date_local > last_scraped
igtv_posts = profile.get_igtv_posts() igtv_posts = profile.get_igtv_posts()
self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter, self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter,
total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile) total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile)
if latest_stamps is not None and igtv_posts.first_item is not None: if latest_stamps is not None and igtv_posts.first_item is not None:
latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone()) latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local)
def _get_id_filename(self, profile_name: str) -> str: def _get_id_filename(self, profile_name: str) -> str:
if ((format_string_contains_key(self.dirname_pattern, 'profile') or if ((format_string_contains_key(self.dirname_pattern, 'profile') or
@ -1437,14 +1456,14 @@ class Instaloader:
if latest_stamps is not None: if latest_stamps is not None:
# pylint:disable=cell-var-from-loop # pylint:disable=cell-var-from-loop
last_scraped = latest_stamps.get_last_post_timestamp(profile_name) last_scraped = latest_stamps.get_last_post_timestamp(profile_name)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped posts_takewhile = lambda p: p.date_local > last_scraped
posts_to_download = profile.get_posts() posts_to_download = profile.get_posts()
self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter, self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter,
total_count=profile.mediacount, owner_profile=profile, total_count=profile.mediacount, owner_profile=profile,
takewhile=posts_takewhile) takewhile=posts_takewhile)
if latest_stamps is not None and posts_to_download.first_item is not None: if latest_stamps is not None and posts_to_download.first_item is not None:
latest_stamps.set_last_post_timestamp(profile_name, latest_stamps.set_last_post_timestamp(profile_name,
posts_to_download.first_item.date_local.astimezone()) posts_to_download.first_item.date_local)
if stories and profiles: if stories and profiles:
with self.context.error_catcher("Download stories"): with self.context.error_catcher("Download stories"):

View File

@ -33,7 +33,7 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N
def default_user_agent() -> str: def default_user_agent() -> str:
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \ return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
'(KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36' '(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
class InstaloaderContext: class InstaloaderContext:

View File

@ -0,0 +1,46 @@
from typing import Any, Callable, Dict, Iterator, Optional, TypeVar
from .instaloadercontext import InstaloaderContext
T = TypeVar('T')
class SectionIterator(Iterator[T]):
"""Iterator for the new 'sections'-style responses.
.. versionadded:: 4.9"""
def __init__(self,
context: InstaloaderContext,
sections_extractor: Callable[[Dict[str, Any]], Dict[str, Any]],
media_wrapper: Callable[[Dict], T],
query_path: str,
first_data: Optional[Dict[str, Any]] = None):
self._context = context
self._sections_extractor = sections_extractor
self._media_wrapper = media_wrapper
self._query_path = query_path
self._data = first_data or self._query()
self._page_index = 0
self._section_index = 0
def __iter__(self):
return self
def _query(self, max_id: Optional[str] = None) -> Dict[str, Any]:
pagination_variables = {"max_id": max_id} if max_id is not None else {}
return self._sections_extractor(
self._context.get_json(self._query_path, params={"__a": 1, **pagination_variables})
)
def __next__(self) -> T:
if self._page_index < len(self._data['sections']):
media = self._data['sections'][self._page_index]['layout_content']['medias'][self._section_index]['media']
self._section_index += 1
if self._section_index >= len(self._data['sections'][self._page_index]['layout_content']['medias']):
self._section_index = 0
self._page_index += 1
return self._media_wrapper(media)
if self._data['more_available']:
self._page_index, self._section_index, self._data = 0, 0, self._query(self._data["next_max_id"])
return self.__next__()
raise StopIteration()

View File

@ -3,7 +3,9 @@ import lzma
import re import re
from base64 import b64decode, b64encode from base64 import b64decode, b64encode
from collections import namedtuple from collections import namedtuple
from contextlib import suppress
from datetime import datetime from datetime import datetime
from itertools import islice
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
from unicodedata import normalize from unicodedata import normalize
@ -12,6 +14,7 @@ from . import __version__
from .exceptions import * from .exceptions import *
from .instaloadercontext import InstaloaderContext from .instaloadercontext import InstaloaderContext
from .nodeiterator import FrozenNodeIterator, NodeIterator from .nodeiterator import FrozenNodeIterator, NodeIterator
from .sectioniterator import SectionIterator
PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url']) PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url'])
PostSidecarNode.__doc__ = "Item of a Sidecar Post." PostSidecarNode.__doc__ = "Item of a Sidecar Post."
@ -90,6 +93,41 @@ class Post:
"""Create a post object from a given mediaid""" """Create a post object from a given mediaid"""
return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid)) return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid))
@classmethod
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
"""Create a post from a given iphone_struct.
.. versionadded:: 4.9"""
media_types = {
1: "GraphImage",
2: "GraphVideo",
8: "GraphSidecar",
}
fake_node = {
"shortcode": media["code"],
"id": media["pk"],
"__typename": media_types[media["media_type"]],
"is_video": media_types[media["media_type"]] == "GraphVideo",
"date": media["taken_at"],
"caption": media["caption"].get("text") if media.get("caption") is not None else None,
"title": media.get("title"),
"viewer_has_liked": media["has_liked"],
"edge_media_preview_like": {"count": media["like_count"]},
"iphone_struct": media,
}
with suppress(KeyError):
fake_node["display_url"] = media['image_versions2']['candidates'][0]['url']
with suppress(KeyError):
fake_node["video_url"] = media['video_versions'][-1]['url']
fake_node["video_duration"] = media["video_duration"]
fake_node["video_view_count"] = media["view_count"]
with suppress(KeyError):
fake_node["edge_sidecar_to_children"] = {"edges": [{"node": {
"display_url": node['image_versions2']['candidates'][0]['url'],
"is_video": media_types[node["media_type"]] == "GraphVideo",
}} for node in media["carousel_media"]]}
return cls(context, fake_node, Profile.from_iphone_struct(context, media["user"]) if "user" in media else None)
@staticmethod @staticmethod
def shortcode_to_mediaid(code: str) -> int: def shortcode_to_mediaid(code: str) -> int:
if len(code) > 11: if len(code) > 11:
@ -225,17 +263,16 @@ class Post:
@property @property
def date_local(self) -> datetime: def date_local(self) -> datetime:
"""Timestamp when the post was created (local time zone).""" """Timestamp when the post was created (local time zone).
return datetime.fromtimestamp(self._node["date"]
if "date" in self._node .. versionchanged:: 4.9
else self._node["taken_at_timestamp"]) Return timezone aware datetime object."""
return datetime.fromtimestamp(self._get_timestamp_date_created()).astimezone()
@property @property
def date_utc(self) -> datetime: def date_utc(self) -> datetime:
"""Timestamp when the post was created (UTC).""" """Timestamp when the post was created (UTC)."""
return datetime.utcfromtimestamp(self._node["date"] return datetime.utcfromtimestamp(self._get_timestamp_date_created())
if "date" in self._node
else self._node["taken_at_timestamp"])
@property @property
def date(self) -> datetime: def date(self) -> datetime:
@ -276,6 +313,12 @@ class Post:
return len(edges) return len(edges)
return 1 return 1
def _get_timestamp_date_created(self) -> float:
"""Timestamp when the post was created"""
return (self._node["date"]
if "date" in self._node
else self._node["taken_at_timestamp"])
def get_is_videos(self) -> List[bool]: def get_is_videos(self) -> List[bool]:
""" """
Return a list containing the ``is_video`` property for each media in the post. Return a list containing the ``is_video`` property for each media in the post.
@ -360,6 +403,16 @@ class Post:
return (pcaption[:30] + u"\u2026") if len(pcaption) > 31 else pcaption return (pcaption[:30] + u"\u2026") if len(pcaption) > 31 else pcaption
return _elliptify(self.caption) if self.caption else '' return _elliptify(self.caption) if self.caption else ''
@property
def accessibility_caption(self) -> Optional[str]:
"""Accessibility caption of the post, if available.
.. versionadded:: 4.9"""
try:
return self._field("accessibility_caption")
except KeyError:
return None
@property @property
def tagged_users(self) -> List[str]: def tagged_users(self) -> List[str]:
"""List of all lowercased users that are tagged in the Post.""" """List of all lowercased users that are tagged in the Post."""
@ -666,6 +719,20 @@ class Profile:
context.profile_id_cache[profile_id] = profile context.profile_id_cache[profile_id] = profile
return profile return profile
@classmethod
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
"""Create a profile from a given iphone_struct.
.. versionadded:: 4.9"""
return cls(context, {
"id": media["pk"],
"username": media["username"],
"is_private": media["is_private"],
"full_name": media["full_name"],
"profile_pic_url_hd": media["profile_pic_url"],
"iphone_struct": media,
})
@classmethod @classmethod
def own_profile(cls, context: InstaloaderContext): def own_profile(cls, context: InstaloaderContext):
"""Return own profile if logged-in. """Return own profile if logged-in.
@ -1048,6 +1115,21 @@ class StoryItem:
def __hash__(self) -> int: def __hash__(self) -> int:
return hash(self.mediaid) return hash(self.mediaid)
@classmethod
def from_mediaid(cls, context: InstaloaderContext, mediaid: int):
"""Create a StoryItem object from a given mediaid.
.. versionadded:: 4.9
"""
pic_json = context.graphql_query(
'2b0673e0dc4580674a88d426fe00ea90',
{'shortcode': Post.mediaid_to_shortcode(mediaid)}
)
shortcode_media = pic_json['data']['shortcode_media']
if shortcode_media is None:
raise BadResponseException("Fetching StoryItem metadata failed.")
return cls(context, shortcode_media)
@property @property
def _iphone_struct(self) -> Dict[str, Any]: def _iphone_struct(self) -> Dict[str, Any]:
if not self._context.iphone_support: if not self._context.iphone_support:
@ -1079,8 +1161,11 @@ class StoryItem:
@property @property
def date_local(self) -> datetime: def date_local(self) -> datetime:
"""Timestamp when the StoryItem was created (local time zone).""" """Timestamp when the StoryItem was created (local time zone).
return datetime.fromtimestamp(self._node['taken_at_timestamp'])
.. versionchanged:: 4.9
Return timezone aware datetime object."""
return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone()
@property @property
def date_utc(self) -> datetime: def date_utc(self) -> datetime:
@ -1360,6 +1445,9 @@ class Hashtag:
L.download_post(post, target="#"+hashtag.name) L.download_post(post, target="#"+hashtag.name)
Also, this class implements == and is hashable. Also, this class implements == and is hashable.
.. versionchanged:: 4.9
Removed ``get_related_tags()`` and ``is_top_media_only`` as these features were removed from Instagram.
""" """
def __init__(self, context: InstaloaderContext, node: Dict[str, Any]): def __init__(self, context: InstaloaderContext, node: Dict[str, Any]):
assert "name" in node assert "name" in node
@ -1388,8 +1476,8 @@ class Hashtag:
return self._node["name"].lower() return self._node["name"].lower()
def _query(self, params): def _query(self, params):
return self._context.get_json("explore/tags/{0}/".format(self.name), json_response = self._context.get_json("explore/tags/{0}/".format(self.name), params)
params)["graphql"]["hashtag"] return json_response["graphql"]["hashtag"] if "graphql" in json_response else json_response["data"]
def _obtain_metadata(self): def _obtain_metadata(self):
if not self._has_full_metadata: if not self._has_full_metadata:
@ -1400,7 +1488,9 @@ class Hashtag:
json_node = self._node.copy() json_node = self._node.copy()
# remove posts # remove posts
json_node.pop("edge_hashtag_to_top_posts", None) json_node.pop("edge_hashtag_to_top_posts", None)
json_node.pop("top", None)
json_node.pop("edge_hashtag_to_media", None) json_node.pop("edge_hashtag_to_media", None)
json_node.pop("recent", None)
return json_node return json_node
def __repr__(self): def __repr__(self):
@ -1436,30 +1526,33 @@ class Hashtag:
return self._metadata("profile_pic_url") return self._metadata("profile_pic_url")
@property @property
def description(self) -> str: def description(self) -> Optional[str]:
return self._metadata("description") return self._metadata("description")
@property @property
def allow_following(self) -> bool: def allow_following(self) -> bool:
return self._metadata("allow_following") return bool(self._metadata("allow_following"))
@property @property
def is_following(self) -> bool: def is_following(self) -> bool:
return self._metadata("is_following") try:
return self._metadata("is_following")
@property except KeyError:
def is_top_media_only(self) -> bool: return bool(self._metadata("following"))
return self._metadata("is_top_media_only")
def get_related_tags(self) -> Iterator["Hashtag"]:
"""Yields similar hashtags."""
yield from (Hashtag(self._context, edge["node"])
for edge in self._metadata("edge_hashtag_to_related_tags", "edges"))
def get_top_posts(self) -> Iterator[Post]: def get_top_posts(self) -> Iterator[Post]:
"""Yields the top posts of the hashtag.""" """Yields the top posts of the hashtag."""
yield from (Post(self._context, edge["node"]) try:
for edge in self._metadata("edge_hashtag_to_top_posts", "edges")) yield from (Post(self._context, edge["node"])
for edge in self._metadata("edge_hashtag_to_top_posts", "edges"))
except KeyError:
yield from SectionIterator(
self._context,
lambda d: d["data"]["top"],
lambda m: Post.from_iphone_struct(self._context, m),
f"explore/tags/{self.name}/",
self._metadata("top"),
)
@property @property
def mediacount(self) -> int: def mediacount(self) -> int:
@ -1469,23 +1562,38 @@ class Hashtag:
The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as
the hashtag count might include private posts the hashtag count might include private posts
""" """
return self._metadata("edge_hashtag_to_media", "count") try:
return self._metadata("edge_hashtag_to_media", "count")
except KeyError:
return self._metadata("media_count")
def get_posts(self) -> Iterator[Post]: def get_posts(self) -> Iterator[Post]:
"""Yields the posts associated with this hashtag.""" """Yields the recent posts associated with this hashtag.
self._metadata("edge_hashtag_to_media", "edges")
self._metadata("edge_hashtag_to_media", "page_info") .. deprecated:: 4.9
conn = self._metadata("edge_hashtag_to_media") Use :meth:`Hashtag.get_posts_resumable` as this method may return incorrect results (:issue:`1457`)"""
yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) try:
while conn["page_info"]["has_next_page"]: self._metadata("edge_hashtag_to_media", "edges")
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]}) self._metadata("edge_hashtag_to_media", "page_info")
conn = data["edge_hashtag_to_media"] conn = self._metadata("edge_hashtag_to_media")
yield from (Post(self._context, edge["node"]) for edge in conn["edges"]) yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
while conn["page_info"]["has_next_page"]:
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]})
conn = data["edge_hashtag_to_media"]
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
except KeyError:
yield from SectionIterator(
self._context,
lambda d: d["data"]["recent"],
lambda m: Post.from_iphone_struct(self._context, m),
f"explore/tags/{self.name}/",
self._metadata("recent"),
)
def get_all_posts(self) -> Iterator[Post]: def get_all_posts(self) -> Iterator[Post]:
"""Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order.""" """Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order."""
sorted_top_posts = iter(sorted(self.get_top_posts(), key=lambda p: p.date_utc, reverse=True)) sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True))
other_posts = self.get_posts() other_posts = self.get_posts_resumable()
next_top = next(sorted_top_posts, None) next_top = next(sorted_top_posts, None)
next_other = next(other_posts, None) next_other = next(other_posts, None)
while next_top is not None or next_other is not None: while next_top is not None or next_other is not None:
@ -1511,6 +1619,20 @@ class Hashtag:
yield next_other yield next_other
next_other = next(other_posts, None) next_other = next(other_posts, None)
def get_posts_resumable(self) -> NodeIterator[Post]:
"""Get the recent posts of the hashtag in a resumable fashion.
:rtype: NodeIterator[Post]
.. versionadded:: 4.9"""
return NodeIterator(
self._context, "9b498c08113f1e09617a1703c22b2f32",
lambda d: d['data']['hashtag']['edge_hashtag_to_media'],
lambda n: Post(self._context, n),
{'tag_name': self.name},
f"https://www.instagram.com/explore/tags/{self.name}/"
)
class TopSearchResults: class TopSearchResults:
""" """
@ -1714,6 +1836,7 @@ def load_structure_from_file(context: InstaloaderContext, filename: str) -> Json
if compressed: if compressed:
fp = lzma.open(filename, 'rt') fp = lzma.open(filename, 'rt')
else: else:
# pylint:disable=consider-using-with
fp = open(filename, 'rt') fp = open(filename, 'rt')
json_structure = json.load(fp) json_structure = json.load(fp)
fp.close() fp.close()