Merge branch 'upcoming/v4.9'

This commit is contained in:
Alexander Graf 2022-03-26 17:15:09 +01:00
commit 385c6c8a35
9 changed files with 374 additions and 154 deletions

View File

@ -12,6 +12,8 @@ pefile = "*"
pywin32-ctypes = "*"
psutil = "*"
typing-extensions = "*"
types-requests = "*"
typed-ast = "*"
[packages]
requests = "*"

168
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "9704331c2f9e76af9e5ee386bc359a46ee66d1e2f93aa880d9d148da7ba0e6f5"
"sha256": "4ea11d3dd7ae0070a8745cf21960210a09834ca21f47905fd3fab20ea68c6d3b"
},
"pipfile-spec": 6,
"requires": {},
@ -21,35 +21,36 @@
],
"version": "==2021.5.30"
},
"chardet": {
"charset-normalizer": {
"hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
],
"version": "==4.0.0"
"markers": "python_version >= '3'",
"version": "==2.0.4"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
],
"version": "==2.10"
"markers": "python_version >= '3'",
"version": "==3.2"
},
"requests": {
"hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
],
"index": "pypi",
"version": "==2.25.1"
"version": "==2.26.0"
},
"urllib3": {
"hashes": [
"sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c",
"sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
],
"index": "pypi",
"version": "==1.26.5"
"version": "==1.26.6"
}
},
"develop": {
@ -69,10 +70,10 @@
},
"astroid": {
"hashes": [
"sha256:3c9a2d84354185d13213ff2640ec03d39168dbcd13648abc84fb13ca3b2e2761",
"sha256:d66a600e1602736a0f24f725a511b0e50d12eb18f54b31ec276d2c26a0a62c6a"
"sha256:3975a0bd5373bdce166e60c851cfcbaf21ee96de80ec518c1f4cb3e94c3fb334",
"sha256:ab7f36e8a78b8e54a62028ba6beef7561db4cdb6f2a5009ecc44a6f42b5697ef"
],
"version": "==2.5.7"
"version": "==2.6.6"
},
"babel": {
"hashes": [
@ -88,12 +89,13 @@
],
"version": "==2021.5.30"
},
"chardet": {
"charset-normalizer": {
"hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
"sha256:0c8911edd15d19223366a194a513099a302055a962bca2cec0f54b8b63175d8b",
"sha256:f23667ebe1084be45f6ae0538e4a5a865206544097e4e8bbcacf42cd02a348f3"
],
"version": "==4.0.0"
"markers": "python_version >= '3'",
"version": "==2.0.4"
},
"docutils": {
"hashes": [
@ -110,10 +112,11 @@
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
"sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a",
"sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"
],
"version": "==2.10"
"markers": "python_version >= '3'",
"version": "==3.2"
},
"imagesize": {
"hashes": [
@ -124,10 +127,10 @@
},
"isort": {
"hashes": [
"sha256:0a943902919f65c5684ac4e0154b1ad4fac6dcaa5d9f3426b732f1c8b5419be6",
"sha256:2bb1680aad211e3c9944dbce1d4ba09a989f04e238296c87fe2139faa26d655d"
"sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899",
"sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2"
],
"version": "==5.8.0"
"version": "==5.9.3"
},
"jinja2": {
"hashes": [
@ -211,31 +214,32 @@
},
"mypy": {
"hashes": [
"sha256:0d0a87c0e7e3a9becdfbe936c981d32e5ee0ccda3e0f07e1ef2c3d1a817cf73e",
"sha256:25adde9b862f8f9aac9d2d11971f226bd4c8fbaa89fb76bdadb267ef22d10064",
"sha256:28fb5479c494b1bab244620685e2eb3c3f988d71fd5d64cc753195e8ed53df7c",
"sha256:2f9b3407c58347a452fc0736861593e105139b905cca7d097e413453a1d650b4",
"sha256:33f159443db0829d16f0a8d83d94df3109bb6dd801975fe86bacb9bf71628e97",
"sha256:3f2aca7f68580dc2508289c729bd49ee929a436208d2b2b6aab15745a70a57df",
"sha256:499c798053cdebcaa916eef8cd733e5584b5909f789de856b482cd7d069bdad8",
"sha256:4eec37370483331d13514c3f55f446fc5248d6373e7029a29ecb7b7494851e7a",
"sha256:552a815579aa1e995f39fd05dde6cd378e191b063f031f2acfe73ce9fb7f9e56",
"sha256:5873888fff1c7cf5b71efbe80e0e73153fe9212fafdf8e44adfe4c20ec9f82d7",
"sha256:61a3d5b97955422964be6b3baf05ff2ce7f26f52c85dd88db11d5e03e146a3a6",
"sha256:674e822aa665b9fd75130c6c5f5ed9564a38c6cea6a6432ce47eafb68ee578c5",
"sha256:7ce3175801d0ae5fdfa79b4f0cfed08807af4d075b402b7e294e6aa72af9aa2a",
"sha256:9743c91088d396c1a5a3c9978354b61b0382b4e3c440ce83cf77994a43e8c521",
"sha256:9f94aac67a2045ec719ffe6111df543bac7874cee01f41928f6969756e030564",
"sha256:a26f8ec704e5a7423c8824d425086705e381b4f1dfdef6e3a1edab7ba174ec49",
"sha256:abf7e0c3cf117c44d9285cc6128856106183938c68fd4944763003decdcfeb66",
"sha256:b09669bcda124e83708f34a94606e01b614fa71931d356c1f1a5297ba11f110a",
"sha256:cd07039aa5df222037005b08fbbfd69b3ab0b0bd7a07d7906de75ae52c4e3119",
"sha256:d23e0ea196702d918b60c8288561e722bf437d82cb7ef2edcd98cfa38905d506",
"sha256:d65cc1df038ef55a99e617431f0553cd77763869eebdf9042403e16089fe746c",
"sha256:d7da2e1d5f558c37d6e8c1246f1aec1e7349e4913d8fb3cb289a35de573fe2eb"
"sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9",
"sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a",
"sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9",
"sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e",
"sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2",
"sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212",
"sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b",
"sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885",
"sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150",
"sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703",
"sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072",
"sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457",
"sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e",
"sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0",
"sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb",
"sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97",
"sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8",
"sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811",
"sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6",
"sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de",
"sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504",
"sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921",
"sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"
],
"index": "pypi",
"version": "==0.812"
"version": "==0.910"
},
"mypy-extensions": {
"hashes": [
@ -246,17 +250,17 @@
},
"packaging": {
"hashes": [
"sha256:5b327ac1320dc863dca72f4514ecc086f31186744b84a230374cc1fd776feae5",
"sha256:67714da7f7bc052e064859c05c595155bd1ee9f69f76557e21f051443c20947a"
"sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7",
"sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14"
],
"version": "==20.9"
"version": "==21.0"
},
"pefile": {
"hashes": [
"sha256:a5d6e8305c6b210849b47a6174ddf9c452b2888340b8177874b862ba6c207645"
"sha256:ed79b2353daa58421459abf4d685953bde0adf9f6e188944f97ba9795f100246"
],
"index": "pypi",
"version": "==2019.4.18"
"version": "==2021.5.24"
},
"psutil": {
"hashes": [
@ -301,25 +305,31 @@
},
"pyinstaller": {
"hashes": [
"sha256:f5c0eeb2aa663cce9a5404292c0195011fa500a6501c873a466b2e8cad3c950c"
"sha256:30733baaf8971902286a0ddf77e5499ac5f7bf8e7c39163e83d4f8c696ef265e",
"sha256:4d848cd782ee0893d7ad9fe2bfe535206a79f0b6760cecc5f2add831258b9322",
"sha256:8f747b190e6ad30e2d2fd5da9a64636f61aac8c038c0b7f685efa92c782ea14f",
"sha256:aae456205c68355f9597411090576bb31b614e53976b4c102d072bbe5db8392a",
"sha256:c587da8f521a7ce1b9efb4e3d0117cd63c92dc6cedff24590aeef89372f53012",
"sha256:ecc2baadeeefd2b6fbf39d13c65d4aa603afdda1c6aaaebc4577ba72893fee9e",
"sha256:fed9f5e4802769a416a8f2ca171c6be961d1861cc05a0b71d20dfe05423137e9"
],
"index": "pypi",
"version": "==4.2"
"version": "==4.5.1"
},
"pyinstaller-hooks-contrib": {
"hashes": [
"sha256:27558072021857d89524c42136feaa2ffe4f003f1bdf0278f9b24f6902c1759c",
"sha256:892310e6363655838485ee748bf1c5e5cade7963686d9af8650ee218a3e0b031"
"sha256:57964f93eb69255c49159ffdf052aae893feed223b0f69773dfd010ca6c569d9",
"sha256:7f5d0689b30da3092149fc536a835a94045ac8c9f0e6dfb23ac171890f5ea8f2"
],
"version": "==2021.1"
"version": "==2021.2"
},
"pylint": {
"hashes": [
"sha256:0e21d3b80b96740909d77206d741aa3ce0b06b41be375d92e1f3244a274c1f8a",
"sha256:d09b0b07ba06bcdff463958f53f23df25e740ecd81895f7d2699ec04bbd8dc3b"
"sha256:2e1a0eb2e8ab41d6b5dbada87f066492bb1557b12b76c47c2ee8aa8a11186594",
"sha256:8b838c8983ee1904b2de66cce9d0b96649a91901350e956d78f289c3bc87b48e"
],
"index": "pypi",
"version": "==2.7.2"
"version": "==2.9.6"
},
"pyparsing": {
"hashes": [
@ -345,11 +355,11 @@
},
"requests": {
"hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
"sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24",
"sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"
],
"index": "pypi",
"version": "==2.25.1"
"version": "==2.26.0"
},
"snowballstemmer": {
"hashes": [
@ -448,24 +458,32 @@
"sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f",
"sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"
],
"index": "pypi",
"version": "==1.4.3"
},
"types-requests": {
"hashes": [
"sha256:a5a305b43ea57bf64d6731f89816946a405b591eff6de28d4c0fd58422cee779",
"sha256:e21541c0f55c066c491a639309159556dd8c5833e49fcde929c4c47bdb0002ee"
],
"index": "pypi",
"version": "==2.25.6"
},
"typing-extensions": {
"hashes": [
"sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918",
"sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c",
"sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"
"sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497",
"sha256:50b6f157849174217d0656f99dc82fe932884fb250826c18350e159ec6cdf342",
"sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84"
],
"index": "pypi",
"version": "==3.7.4.3"
"version": "==3.10.0.0"
},
"urllib3": {
"hashes": [
"sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c",
"sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098"
"sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4",
"sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f"
],
"index": "pypi",
"version": "==1.26.5"
"version": "==1.26.6"
},
"wrapt": {
"hashes": [

View File

@ -239,6 +239,13 @@ How to Download
.. versionadded:: 4.8
.. option:: --sanitize-paths
Force sanitization of paths so that the resulting file and directory names
are valid on both Windows and Unix.
.. versionadded:: 4.9
.. option:: --resume-prefix prefix
For many targets, Instaloader is capable of resuming a previously-aborted
@ -251,7 +258,8 @@ How to Download
- Profile posts,
- Profile IGTV posts (:option:`--igtv`),
- Profile tagged posts (:option:`--tagged`),
- Saved posts (``:saved``).
- Saved posts (``:saved``),
- Hashtags.
This feature is enabled by default for targets where it is supported;
:option:`--resume-prefix` only changes the name of the iterator files.
@ -273,7 +281,7 @@ How to Download
.. option:: --user-agent USER_AGENT
User Agent to use for HTTP requests. Per default, Instaloader pretends being
Chrome/89 on Linux.
Chrome/92 on Linux.
.. option:: --max-connection-attempts N

View File

@ -1,7 +1,7 @@
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""
__version__ = '4.8.5'
__version__ = '4.9b3'
try:

View File

@ -383,6 +383,9 @@ def main():
g_how.add_argument('--resume-prefix', metavar='PREFIX',
help='Prefix for filenames that are used to save the information to resume an interrupted '
'download.')
g_how.add_argument('--sanitize-paths', action='store_true',
help='Sanitize paths so that the resulting file and directory names are valid on both '
'Windows and Unix.')
g_how.add_argument('--no-resume', action='store_true',
help='Do not resume a previously-aborted download iteration, and do not save such information '
'when interrupted.')
@ -463,7 +466,8 @@ def main():
slide=args.slide,
fatal_status_codes=args.abort_on,
iphone_support=not args.no_iphone,
title_pattern=args.title_pattern)
title_pattern=args.title_pattern,
sanitize_paths=args.sanitize_paths)
_main(loader,
args.profile,
username=args.login.lower() if args.login is not None else None,

View File

@ -22,6 +22,7 @@ from .exceptions import *
from .instaloadercontext import InstaloaderContext, RateController
from .lateststamps import LatestStamps
from .nodeiterator import NodeIterator, resumable_iteration
from .sectioniterator import SectionIterator
from .structures import (Hashtag, Highlight, JsonExportable, Post, PostLocation, Profile, Story, StoryItem,
load_structure_from_file, save_structure_to_file, PostSidecarNode, TitlePic)
@ -136,20 +137,38 @@ class _ArbitraryItemFormatter(string.Formatter):
class _PostPathFormatter(_ArbitraryItemFormatter):
RESERVED: set = {'CON', 'PRN', 'AUX', 'NUL',
'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}
def __init__(self, item: Any, force_windows_path: bool = False):
super().__init__(item)
self.force_windows_path = force_windows_path
def get_value(self, key, args, kwargs):
ret = super().get_value(key, args, kwargs)
if not isinstance(ret, str):
return ret
return self.sanitize_path(ret)
return self.sanitize_path(ret, self.force_windows_path)
@staticmethod
def sanitize_path(ret: str) -> str:
def sanitize_path(ret: str, force_windows_path: bool = False) -> str:
"""Replaces '/' with similar looking Division Slash and some other illegal filename characters on Windows."""
ret = ret.replace('/', '\u2215')
if platform.system() == 'Windows':
if ret.startswith('.'):
ret = ret.replace('.', '\u2024', 1)
if force_windows_path or platform.system() == 'Windows':
ret = ret.replace(':', '\uff1a').replace('<', '\ufe64').replace('>', '\ufe65').replace('\"', '\uff02')
ret = ret.replace('\\', '\ufe68').replace('|', '\uff5c').replace('?', '\ufe16').replace('*', '\uff0a')
ret = ret.replace('\n', ' ').replace('\r', ' ')
root, ext = os.path.splitext(ret)
if root.upper() in _PostPathFormatter.RESERVED:
root += '_'
if ext == '.':
ext = '\u2024'
ret = root + ext
return ret
@ -182,6 +201,7 @@ class Instaloader:
:param slide: :option:`--slide`
:param fatal_status_codes: :option:`--abort-on`
:param iphone_support: not :option:`--no-iphone`
:param sanitize_paths: :option:`--sanitize-paths`
.. attribute:: context
@ -211,7 +231,8 @@ class Instaloader:
slide: Optional[str] = None,
fatal_status_codes: Optional[List[int]] = None,
iphone_support: bool = True,
title_pattern: Optional[str] = None):
title_pattern: Optional[str] = None,
sanitize_paths: bool = False):
self.context = InstaloaderContext(sleep, quiet, user_agent, max_connection_attempts,
request_timeout, rate_controller, fatal_status_codes,
@ -228,6 +249,7 @@ class Instaloader:
self.title_pattern = '{date_utc}_UTC_{typename}'
else:
self.title_pattern = '{target}_{date_utc}_UTC_{typename}'
self.sanitize_paths = sanitize_paths
self.download_pictures = download_pictures
self.download_videos = download_videos
self.download_video_thumbnails = download_video_thumbnails
@ -291,7 +313,8 @@ class Instaloader:
check_resume_bbd=self.check_resume_bbd,
slide=self.slide,
fatal_status_codes=self.context.fatal_status_codes,
iphone_support=self.context.iphone_support)
iphone_support=self.context.iphone_support,
sanitize_paths=self.sanitize_paths)
yield new_loader
self.context.error_log.extend(new_loader.context.error_log)
new_loader.context.error_log = [] # avoid double-printing of errors
@ -511,9 +534,10 @@ class Instaloader:
pic_bytes = http_response.content
ig_filename = url.split('/')[-1].split('?')[0]
pic_data = TitlePic(owner_profile, target, name_suffix, ig_filename, date_object)
dirname = _PostPathFormatter(pic_data).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname,
_PostPathFormatter(pic_data).format(self.title_pattern, target=target))
dirname = _PostPathFormatter(pic_data, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(
dirname,
_PostPathFormatter(pic_data, self.sanitize_paths).format(self.title_pattern, target=target))
filename = self.__prepare_filename(filename_template, lambda: url) + ".jpg"
content_length = http_response.headers.get('Content-Length', None)
if os.path.isfile(filename) and (not self.context.is_logged_in or
@ -638,7 +662,7 @@ class Instaloader:
"""Format filename of a :class:`Post` or :class:`StoryItem` according to ``filename-pattern`` parameter.
.. versionadded:: 4.1"""
return _PostPathFormatter(item).format(self.filename_pattern, target=target)
return _PostPathFormatter(item, self.sanitize_paths).format(self.filename_pattern, target=target)
def download_post(self, post: Post, target: Union[str, Path]) -> bool:
"""
@ -670,7 +694,7 @@ class Instaloader:
return False
return True
dirname = _PostPathFormatter(post).format(self.dirname_pattern, target=target)
dirname = _PostPathFormatter(post, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(post, target=target))
filename = self.__prepare_filename(filename_template, lambda: post.url)
@ -821,7 +845,7 @@ class Instaloader:
last_scraped = latest_stamps.get_last_story_timestamp(name)
scraped_timestamp = datetime.now().astimezone()
for item in user_story.get_items():
if latest_stamps is not None and item.date_utc.replace(tzinfo=timezone.utc) <= last_scraped:
if latest_stamps is not None and item.date_local <= last_scraped:
break
if storyitem_filter is not None and not storyitem_filter(item):
self.context.log("<{} skipped>".format(item), flush=True)
@ -851,7 +875,7 @@ class Instaloader:
return True
date_local = item.date_local
dirname = _PostPathFormatter(item).format(self.dirname_pattern, target=target)
dirname = _PostPathFormatter(item, self.sanitize_paths).format(self.dirname_pattern, target=target)
filename_template = os.path.join(dirname, self.format_filename(item, target=target))
filename = self.__prepare_filename(filename_template, lambda: item.url)
downloaded = False
@ -919,8 +943,9 @@ class Instaloader:
name = user_highlight.owner_username
highlight_target = (filename_target
if filename_target
else (Path(_PostPathFormatter.sanitize_path(name)) /
_PostPathFormatter.sanitize_path(user_highlight.title))) # type: Union[str, Path]
else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(user_highlight.title,
self.sanitize_paths))) # type: Union[str, Path]
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
self.download_highlight_cover(user_highlight, highlight_target)
totalcount = user_highlight.itemcount
@ -970,7 +995,7 @@ class Instaloader:
else total_count)
sanitized_target = target
if isinstance(target, str):
sanitized_target = _PostPathFormatter.sanitize_path(target)
sanitized_target = _PostPathFormatter.sanitize_path(target, self.sanitize_paths)
if takewhile is None:
takewhile = lambda _: True
with resumable_iteration(
@ -1097,18 +1122,12 @@ class Instaloader:
.. versionchanged:: 4.2.9
Require being logged in (as required by Instagram)
"""
has_next_page = True
end_cursor = None
while has_next_page:
if end_cursor:
params = {'__a': 1, 'max_id': end_cursor}
else:
params = {'__a': 1}
location_data = self.context.get_json('explore/locations/{0}/'.format(location),
params)['graphql']['location']['edge_location_to_media']
yield from (Post(self.context, edge['node']) for edge in location_data['edges'])
has_next_page = location_data['page_info']['has_next_page']
end_cursor = location_data['page_info']['end_cursor']
yield from SectionIterator(
self.context,
lambda d: d["native_location_data"]["recent"],
lambda m: Post.from_iphone_struct(self.context, m),
f"explore/locations/{location}/",
)
@_requires_login
def download_location(self, location: str,
@ -1157,8 +1176,8 @@ class Instaloader:
"""Get Posts associated with a #hashtag.
.. deprecated:: 4.4
Use :meth:`Hashtag.get_posts`."""
return Hashtag.from_name(self.context, hashtag).get_posts()
Use :meth:`Hashtag.get_posts_resumable`."""
return Hashtag.from_name(self.context, hashtag).get_posts_resumable()
def download_hashtag(self, hashtag: Union[Hashtag, str],
max_count: Optional[int] = None,
@ -1194,7 +1213,7 @@ class Instaloader:
self.download_hashtag_profilepic(hashtag)
if posts:
self.context.log("Retrieving pictures with hashtag #{}...".format(hashtag.name))
self.posts_download_loop(hashtag.get_all_posts(), target, fast_update, post_filter,
self.posts_download_loop(hashtag.get_posts_resumable(), target, fast_update, post_filter,
max_count=max_count)
if self.save_metadata:
json_filename = '{0}/{1}'.format(self.dirname_pattern.format(profile=target,
@ -1216,15 +1235,15 @@ class Instaloader:
posts_takewhile: Optional[Callable[[Post], bool]] = None
if latest_stamps is not None:
last_scraped = latest_stamps.get_last_tagged_timestamp(profile.username)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped
posts_takewhile = lambda p: p.date_local > last_scraped
tagged_posts = profile.get_tagged_posts()
self.posts_download_loop(tagged_posts,
target if target
else (Path(_PostPathFormatter.sanitize_path(profile.username)) /
_PostPathFormatter.sanitize_path(':tagged')),
else (Path(_PostPathFormatter.sanitize_path(profile.username, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(':tagged', self.sanitize_paths)),
fast_update, post_filter, takewhile=posts_takewhile)
if latest_stamps is not None and tagged_posts.first_item is not None:
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local.astimezone())
latest_stamps.set_last_tagged_timestamp(profile.username, tagged_posts.first_item.date_local)
def download_igtv(self, profile: Profile, fast_update: bool = False,
post_filter: Optional[Callable[[Post], bool]] = None,
@ -1239,12 +1258,12 @@ class Instaloader:
posts_takewhile: Optional[Callable[[Post], bool]] = None
if latest_stamps is not None:
last_scraped = latest_stamps.get_last_igtv_timestamp(profile.username)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped
posts_takewhile = lambda p: p.date_local > last_scraped
igtv_posts = profile.get_igtv_posts()
self.posts_download_loop(igtv_posts, profile.username, fast_update, post_filter,
total_count=profile.igtvcount, owner_profile=profile, takewhile=posts_takewhile)
if latest_stamps is not None and igtv_posts.first_item is not None:
latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local.astimezone())
latest_stamps.set_last_igtv_timestamp(profile.username, igtv_posts.first_item.date_local)
def _get_id_filename(self, profile_name: str) -> str:
if ((format_string_contains_key(self.dirname_pattern, 'profile') or
@ -1437,14 +1456,14 @@ class Instaloader:
if latest_stamps is not None:
# pylint:disable=cell-var-from-loop
last_scraped = latest_stamps.get_last_post_timestamp(profile_name)
posts_takewhile = lambda p: p.date_utc.replace(tzinfo=timezone.utc) > last_scraped
posts_takewhile = lambda p: p.date_local > last_scraped
posts_to_download = profile.get_posts()
self.posts_download_loop(posts_to_download, profile_name, fast_update, post_filter,
total_count=profile.mediacount, owner_profile=profile,
takewhile=posts_takewhile)
if latest_stamps is not None and posts_to_download.first_item is not None:
latest_stamps.set_last_post_timestamp(profile_name,
posts_to_download.first_item.date_local.astimezone())
posts_to_download.first_item.date_local)
if stories and profiles:
with self.context.error_catcher("Download stories"):

View File

@ -33,7 +33,7 @@ def copy_session(session: requests.Session, request_timeout: Optional[float] = N
def default_user_agent() -> str:
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
'(KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
'(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
class InstaloaderContext:

View File

@ -0,0 +1,46 @@
from typing import Any, Callable, Dict, Iterator, Optional, TypeVar
from .instaloadercontext import InstaloaderContext
T = TypeVar('T')
class SectionIterator(Iterator[T]):
"""Iterator for the new 'sections'-style responses.
.. versionadded:: 4.9"""
def __init__(self,
context: InstaloaderContext,
sections_extractor: Callable[[Dict[str, Any]], Dict[str, Any]],
media_wrapper: Callable[[Dict], T],
query_path: str,
first_data: Optional[Dict[str, Any]] = None):
self._context = context
self._sections_extractor = sections_extractor
self._media_wrapper = media_wrapper
self._query_path = query_path
self._data = first_data or self._query()
self._page_index = 0
self._section_index = 0
def __iter__(self):
return self
def _query(self, max_id: Optional[str] = None) -> Dict[str, Any]:
pagination_variables = {"max_id": max_id} if max_id is not None else {}
return self._sections_extractor(
self._context.get_json(self._query_path, params={"__a": 1, **pagination_variables})
)
def __next__(self) -> T:
if self._page_index < len(self._data['sections']):
media = self._data['sections'][self._page_index]['layout_content']['medias'][self._section_index]['media']
self._section_index += 1
if self._section_index >= len(self._data['sections'][self._page_index]['layout_content']['medias']):
self._section_index = 0
self._page_index += 1
return self._media_wrapper(media)
if self._data['more_available']:
self._page_index, self._section_index, self._data = 0, 0, self._query(self._data["next_max_id"])
return self.__next__()
raise StopIteration()

View File

@ -3,7 +3,9 @@ import lzma
import re
from base64 import b64decode, b64encode
from collections import namedtuple
from contextlib import suppress
from datetime import datetime
from itertools import islice
from pathlib import Path
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
from unicodedata import normalize
@ -12,6 +14,7 @@ from . import __version__
from .exceptions import *
from .instaloadercontext import InstaloaderContext
from .nodeiterator import FrozenNodeIterator, NodeIterator
from .sectioniterator import SectionIterator
PostSidecarNode = namedtuple('PostSidecarNode', ['is_video', 'display_url', 'video_url'])
PostSidecarNode.__doc__ = "Item of a Sidecar Post."
@ -90,6 +93,41 @@ class Post:
"""Create a post object from a given mediaid"""
return cls.from_shortcode(context, Post.mediaid_to_shortcode(mediaid))
@classmethod
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
"""Create a post from a given iphone_struct.
.. versionadded:: 4.9"""
media_types = {
1: "GraphImage",
2: "GraphVideo",
8: "GraphSidecar",
}
fake_node = {
"shortcode": media["code"],
"id": media["pk"],
"__typename": media_types[media["media_type"]],
"is_video": media_types[media["media_type"]] == "GraphVideo",
"date": media["taken_at"],
"caption": media["caption"].get("text") if media.get("caption") is not None else None,
"title": media.get("title"),
"viewer_has_liked": media["has_liked"],
"edge_media_preview_like": {"count": media["like_count"]},
"iphone_struct": media,
}
with suppress(KeyError):
fake_node["display_url"] = media['image_versions2']['candidates'][0]['url']
with suppress(KeyError):
fake_node["video_url"] = media['video_versions'][-1]['url']
fake_node["video_duration"] = media["video_duration"]
fake_node["video_view_count"] = media["view_count"]
with suppress(KeyError):
fake_node["edge_sidecar_to_children"] = {"edges": [{"node": {
"display_url": node['image_versions2']['candidates'][0]['url'],
"is_video": media_types[node["media_type"]] == "GraphVideo",
}} for node in media["carousel_media"]]}
return cls(context, fake_node, Profile.from_iphone_struct(context, media["user"]) if "user" in media else None)
@staticmethod
def shortcode_to_mediaid(code: str) -> int:
if len(code) > 11:
@ -225,17 +263,16 @@ class Post:
@property
def date_local(self) -> datetime:
"""Timestamp when the post was created (local time zone)."""
return datetime.fromtimestamp(self._node["date"]
if "date" in self._node
else self._node["taken_at_timestamp"])
"""Timestamp when the post was created (local time zone).
.. versionchanged:: 4.9
Return timezone aware datetime object."""
return datetime.fromtimestamp(self._get_timestamp_date_created()).astimezone()
@property
def date_utc(self) -> datetime:
"""Timestamp when the post was created (UTC)."""
return datetime.utcfromtimestamp(self._node["date"]
if "date" in self._node
else self._node["taken_at_timestamp"])
return datetime.utcfromtimestamp(self._get_timestamp_date_created())
@property
def date(self) -> datetime:
@ -276,6 +313,12 @@ class Post:
return len(edges)
return 1
def _get_timestamp_date_created(self) -> float:
"""Timestamp when the post was created"""
return (self._node["date"]
if "date" in self._node
else self._node["taken_at_timestamp"])
def get_is_videos(self) -> List[bool]:
"""
Return a list containing the ``is_video`` property for each media in the post.
@ -360,6 +403,16 @@ class Post:
return (pcaption[:30] + u"\u2026") if len(pcaption) > 31 else pcaption
return _elliptify(self.caption) if self.caption else ''
@property
def accessibility_caption(self) -> Optional[str]:
"""Accessibility caption of the post, if available.
.. versionadded:: 4.9"""
try:
return self._field("accessibility_caption")
except KeyError:
return None
@property
def tagged_users(self) -> List[str]:
"""List of all lowercased users that are tagged in the Post."""
@ -666,6 +719,20 @@ class Profile:
context.profile_id_cache[profile_id] = profile
return profile
@classmethod
def from_iphone_struct(cls, context: InstaloaderContext, media: Dict[str, Any]):
"""Create a profile from a given iphone_struct.
.. versionadded:: 4.9"""
return cls(context, {
"id": media["pk"],
"username": media["username"],
"is_private": media["is_private"],
"full_name": media["full_name"],
"profile_pic_url_hd": media["profile_pic_url"],
"iphone_struct": media,
})
@classmethod
def own_profile(cls, context: InstaloaderContext):
"""Return own profile if logged-in.
@ -1048,6 +1115,21 @@ class StoryItem:
def __hash__(self) -> int:
return hash(self.mediaid)
@classmethod
def from_mediaid(cls, context: InstaloaderContext, mediaid: int):
"""Create a StoryItem object from a given mediaid.
.. versionadded:: 4.9
"""
pic_json = context.graphql_query(
'2b0673e0dc4580674a88d426fe00ea90',
{'shortcode': Post.mediaid_to_shortcode(mediaid)}
)
shortcode_media = pic_json['data']['shortcode_media']
if shortcode_media is None:
raise BadResponseException("Fetching StoryItem metadata failed.")
return cls(context, shortcode_media)
@property
def _iphone_struct(self) -> Dict[str, Any]:
if not self._context.iphone_support:
@ -1079,8 +1161,11 @@ class StoryItem:
@property
def date_local(self) -> datetime:
"""Timestamp when the StoryItem was created (local time zone)."""
return datetime.fromtimestamp(self._node['taken_at_timestamp'])
"""Timestamp when the StoryItem was created (local time zone).
.. versionchanged:: 4.9
Return timezone aware datetime object."""
return datetime.fromtimestamp(self._node['taken_at_timestamp']).astimezone()
@property
def date_utc(self) -> datetime:
@ -1360,6 +1445,9 @@ class Hashtag:
L.download_post(post, target="#"+hashtag.name)
Also, this class implements == and is hashable.
.. versionchanged:: 4.9
Removed ``get_related_tags()`` and ``is_top_media_only`` as these features were removed from Instagram.
"""
def __init__(self, context: InstaloaderContext, node: Dict[str, Any]):
assert "name" in node
@ -1388,8 +1476,8 @@ class Hashtag:
return self._node["name"].lower()
def _query(self, params):
return self._context.get_json("explore/tags/{0}/".format(self.name),
params)["graphql"]["hashtag"]
json_response = self._context.get_json("explore/tags/{0}/".format(self.name), params)
return json_response["graphql"]["hashtag"] if "graphql" in json_response else json_response["data"]
def _obtain_metadata(self):
if not self._has_full_metadata:
@ -1400,7 +1488,9 @@ class Hashtag:
json_node = self._node.copy()
# remove posts
json_node.pop("edge_hashtag_to_top_posts", None)
json_node.pop("top", None)
json_node.pop("edge_hashtag_to_media", None)
json_node.pop("recent", None)
return json_node
def __repr__(self):
@ -1436,30 +1526,33 @@ class Hashtag:
return self._metadata("profile_pic_url")
@property
def description(self) -> str:
def description(self) -> Optional[str]:
return self._metadata("description")
@property
def allow_following(self) -> bool:
return self._metadata("allow_following")
return bool(self._metadata("allow_following"))
@property
def is_following(self) -> bool:
return self._metadata("is_following")
@property
def is_top_media_only(self) -> bool:
return self._metadata("is_top_media_only")
def get_related_tags(self) -> Iterator["Hashtag"]:
"""Yields similar hashtags."""
yield from (Hashtag(self._context, edge["node"])
for edge in self._metadata("edge_hashtag_to_related_tags", "edges"))
try:
return self._metadata("is_following")
except KeyError:
return bool(self._metadata("following"))
def get_top_posts(self) -> Iterator[Post]:
"""Yields the top posts of the hashtag."""
yield from (Post(self._context, edge["node"])
for edge in self._metadata("edge_hashtag_to_top_posts", "edges"))
try:
yield from (Post(self._context, edge["node"])
for edge in self._metadata("edge_hashtag_to_top_posts", "edges"))
except KeyError:
yield from SectionIterator(
self._context,
lambda d: d["data"]["top"],
lambda m: Post.from_iphone_struct(self._context, m),
f"explore/tags/{self.name}/",
self._metadata("top"),
)
@property
def mediacount(self) -> int:
@ -1469,23 +1562,38 @@ class Hashtag:
The number of posts with a certain hashtag may differ from the number of posts that can actually be accessed, as
the hashtag count might include private posts
"""
return self._metadata("edge_hashtag_to_media", "count")
try:
return self._metadata("edge_hashtag_to_media", "count")
except KeyError:
return self._metadata("media_count")
def get_posts(self) -> Iterator[Post]:
"""Yields the posts associated with this hashtag."""
self._metadata("edge_hashtag_to_media", "edges")
self._metadata("edge_hashtag_to_media", "page_info")
conn = self._metadata("edge_hashtag_to_media")
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
while conn["page_info"]["has_next_page"]:
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]})
conn = data["edge_hashtag_to_media"]
"""Yields the recent posts associated with this hashtag.
.. deprecated:: 4.9
Use :meth:`Hashtag.get_posts_resumable` as this method may return incorrect results (:issue:`1457`)"""
try:
self._metadata("edge_hashtag_to_media", "edges")
self._metadata("edge_hashtag_to_media", "page_info")
conn = self._metadata("edge_hashtag_to_media")
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
while conn["page_info"]["has_next_page"]:
data = self._query({'__a': 1, 'max_id': conn["page_info"]["end_cursor"]})
conn = data["edge_hashtag_to_media"]
yield from (Post(self._context, edge["node"]) for edge in conn["edges"])
except KeyError:
yield from SectionIterator(
self._context,
lambda d: d["data"]["recent"],
lambda m: Post.from_iphone_struct(self._context, m),
f"explore/tags/{self.name}/",
self._metadata("recent"),
)
def get_all_posts(self) -> Iterator[Post]:
"""Yields all posts, i.e. all most recent posts and the top posts, in almost-chronological order."""
sorted_top_posts = iter(sorted(self.get_top_posts(), key=lambda p: p.date_utc, reverse=True))
other_posts = self.get_posts()
sorted_top_posts = iter(sorted(islice(self.get_top_posts(), 9), key=lambda p: p.date_utc, reverse=True))
other_posts = self.get_posts_resumable()
next_top = next(sorted_top_posts, None)
next_other = next(other_posts, None)
while next_top is not None or next_other is not None:
@ -1511,6 +1619,20 @@ class Hashtag:
yield next_other
next_other = next(other_posts, None)
def get_posts_resumable(self) -> NodeIterator[Post]:
"""Get the recent posts of the hashtag in a resumable fashion.
:rtype: NodeIterator[Post]
.. versionadded:: 4.9"""
return NodeIterator(
self._context, "9b498c08113f1e09617a1703c22b2f32",
lambda d: d['data']['hashtag']['edge_hashtag_to_media'],
lambda n: Post(self._context, n),
{'tag_name': self.name},
f"https://www.instagram.com/explore/tags/{self.name}/"
)
class TopSearchResults:
"""
@ -1714,6 +1836,7 @@ def load_structure_from_file(context: InstaloaderContext, filename: str) -> Json
if compressed:
fp = lzma.open(filename, 'rt')
else:
# pylint:disable=consider-using-with
fp = open(filename, 'rt')
json_structure = json.load(fp)
fp.close()