From 80dde55f42c9ad19eb3ed2e8658ec46388c8b94e Mon Sep 17 00:00:00 2001 From: erjemin Date: Tue, 23 Dec 2025 17:19:42 +0300 Subject: [PATCH] =?UTF-8?q?mod:=20=D0=92=D0=B8=D1=81=D1=8F=D1=87=D0=B0?= =?UTF-8?q?=D1=8F=20=D0=BF=D1=83=D0=BD=D0=BA=D1=82=D1=83=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D1=8F=20=D0=B2=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D0=B9=D1=80?= =?UTF-8?q?=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- etpgrf/typograph.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/etpgrf/typograph.py b/etpgrf/typograph.py index f59eda3..5126eff 100644 --- a/etpgrf/typograph.py +++ b/etpgrf/typograph.py @@ -14,6 +14,7 @@ from etpgrf.quotes import QuotesProcessor from etpgrf.layout import LayoutProcessor from etpgrf.symbols import SymbolsProcessor from etpgrf.sanitizer import SanitizerProcessor +from etpgrf.hanging import HangingPunctuationProcessor from etpgrf.codec import decode_to_unicode, encode_from_unicode from etpgrf.config import PROTECTED_HTML_TAGS, SANITIZE_ALL_HTML @@ -34,6 +35,7 @@ class Typographer: layout: LayoutProcessor | bool | None = True, # Правила для тире и спецсимволов symbols: SymbolsProcessor | bool | None = True, # Правила для псевдографики sanitizer: SanitizerProcessor | str | bool | None = None, # Правила очистки + hanging_punctuation: str | bool | list[str] | None = None, # Висячая пунктуация # ... другие модули правил ... ): @@ -96,6 +98,11 @@ class Typographer: elif sanitizer: # Если передана строка режима или True self.sanitizer = SanitizerProcessor(mode=sanitizer) + # J. --- Конфигурация висячей пунктуации --- + self.hanging: HangingPunctuationProcessor | None = None + if hanging_punctuation: + self.hanging = HangingPunctuationProcessor(mode=hanging_punctuation) + # Z. --- Логирование инициализации --- logger.debug(f"Typographer `__init__`: langs: {self.langs}, mode: {self.mode}, " f"hyphenation: {self.hyphenation is not None}, " @@ -104,6 +111,7 @@ class Typographer: f"layout: {self.layout is not None}, " f"symbols: {self.symbols is not None}, " f"sanitizer: {self.sanitizer is not None}, " + f"hanging: {self.hanging is not None}, " f"process_html: {self.process_html}") @@ -216,6 +224,11 @@ class Typographer: # который применит все остальные правила к каждому текстовому узлу. self._walk_tree(soup) + # --- ЭТАП 4.5: Висячая пунктуация --- + # Применяем после всех текстовых преобразований, но перед финальной сборкой + if self.hanging: + self.hanging.process(soup) + # --- ЭТАП 5: Финальная сборка --- processed_html = str(soup) # BeautifulSoup по умолчанию экранирует амперсанды (& -> &), которые мы сгенерировали