first commit

2026-05-25 01:12:43 +03:00
commit bfc22efe24
83 changed files with 8903 additions and 0 deletions
@@ -0,0 +1,17 @@
+import simplejson as json
+
+# init
+CFG_PATH = "cfg/config.json"
+
+
+# load cfg and return it
+def load_config(cfg_path=CFG_PATH):
+
+    with open(cfg_path, "r", encoding="utf-8") as config_fp:
+        return json.load(config_fp)
+
+
+def rewrite_config(obj, cfg_path=CFG_PATH):
+
+    with open(cfg_path, "w", encoding="utf-8") as config_fp:
+        json.dump(obj, config_fp, indent=4)
@@ -0,0 +1,85 @@
+import logging
+
+import httpx
+from decouple import config
+
+
+logger = logging.getLogger(__name__)
+
+RAG_API_URL = config("RAG_API_URL", default="http://api:8080").rstrip("/")
+HTTP_TIMEOUT = httpx.Timeout(120.0, connect=20.0)
+
+
+def build_history_payload(messages: list) -> list[dict]:
+    payload = []
+
+    for message in messages[-6:]:
+        payload.append(
+            {
+                "role": message.role,
+                "content": message.content,
+            }
+        )
+
+    return payload
+
+
+async def ask_rag_answer(
+    *,
+    user_id: int,
+    question: str,
+    category: str | None,
+    region: str | None,
+    user_type: str | None,
+    consultation_id: int | None = None,
+    history: list | None = None,
+    top_k: int = 5,
+) -> dict:
+    payload = {
+        "user_id": user_id,
+        "consultation_id": consultation_id,
+        "save_history": True,
+        "question": question,
+        "category": category,
+        "region": region,
+        "user_type": user_type,
+        "history": build_history_payload(history or []),
+        "top_k": top_k,
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=HTTP_TIMEOUT) as client:
+            response = await client.post(
+                f"{RAG_API_URL}/api/v1/rag/answer",
+                json=payload,
+            )
+            response.raise_for_status()
+            return response.json()
+    except httpx.HTTPStatusError as exc:
+        detail = ""
+        try:
+            detail = exc.response.json().get("detail", "")
+        except Exception:
+            detail = exc.response.text
+
+        if "No reliable law chunks" in detail:
+            detail = (
+                "Я не нашел в базе надежную норму по этому вопросу. "
+                "Попробуйте уточнить ситуацию и задать вопрос еще раз."
+            )
+        elif "User was not found" in detail:
+            detail = (
+                "Профиль пользователя не найден в базе. "
+                "Нажмите /start и попробуйте еще раз."
+            )
+        elif "Consultation was not found" in detail:
+            detail = (
+                "Не удалось найти выбранную консультацию. "
+                "Откройте историю заново или начните новую консультацию."
+            )
+
+        logger.warning("RAG API returned %s: %s", exc.response.status_code, detail)
+        raise RuntimeError(detail or "Сервис анализа вернул ошибку.")
+    except httpx.HTTPError as exc:
+        logger.exception("RAG API request failed")
+        raise RuntimeError("Не удалось связаться с сервисом анализа. Попробуйте позже.") from exc
@@ -0,0 +1,120 @@
+import html
+import re
+
+
+def to_html(obj):
+
+    return html.escape(str(obj))
+
+
+def format_llm_answer_html(text: str | None) -> str:
+    if text is None:
+        return ""
+
+    escaped = html.escape(str(text).replace("\r\n", "\n").strip())
+    normalized_lines = []
+
+    for line in escaped.split("\n"):
+        normalized_line = re.sub(r"^\s*[-*]\s+", "• ", line.rstrip())
+        normalized_lines.append(normalized_line)
+
+    formatted = "\n".join(normalized_lines)
+    formatted = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", formatted)
+    return formatted
+
+
+def split_plain_text_chunks(text: str | None, limit: int = 3500) -> list[str]:
+    if text is None:
+        return [""]
+
+    normalized = str(text).replace("\r\n", "\n").strip()
+    if not normalized:
+        return [""]
+
+    paragraphs = normalized.split("\n\n")
+    chunks: list[str] = []
+    current = ""
+
+    for paragraph in paragraphs:
+        paragraph = paragraph.strip()
+        if not paragraph:
+            continue
+
+        candidate = paragraph if not current else f"{current}\n\n{paragraph}"
+        if len(candidate) <= limit:
+            current = candidate
+            continue
+
+        if current:
+            chunks.append(current)
+            current = ""
+
+        if len(paragraph) <= limit:
+            current = paragraph
+            continue
+
+        lines = paragraph.split("\n")
+        line_buffer = ""
+
+        for line in lines:
+            line = line.rstrip()
+            line_candidate = line if not line_buffer else f"{line_buffer}\n{line}"
+            if len(line_candidate) <= limit:
+                line_buffer = line_candidate
+                continue
+
+            if line_buffer:
+                chunks.append(line_buffer)
+                line_buffer = ""
+
+            while len(line) > limit:
+                chunks.append(line[:limit].rstrip())
+                line = line[limit:].lstrip()
+
+            line_buffer = line
+
+        if line_buffer:
+            current = line_buffer
+
+    if current:
+        chunks.append(current)
+
+    return chunks or [normalized[:limit]]
+
+
+def parse_links_to_inline_markup(message: str) -> list:
+    """
+    Парсит сообщение с форматированными ссылками и возвращает список рядов кнопок.
+
+    Формат входного сообщения:
+    - [Текст кнопки + Ссылка] для одной кнопки.
+    - [Кнопка1 + Ссылка1][Кнопка2 + Ссылка2] для нескольких кнопок в одном ряду.
+    - Каждая строка представляет отдельный ряд кнопок.
+
+    Пример:
+    [Кнопка1 + https://example.com]
+    [Кнопка2 + https://example.org][Кнопка3 + https://example.net]
+
+    :param message: Строка с отформатированными ссылками.
+    :return: Список рядов кнопок, где каждый ряд — это список кортежей (Текст, Ссылка).
+    """
+    # Исправленное регулярное выражение для поиска [Текст + Ссылка]
+    pattern = re.compile(r"\[([^\[\]+]+)\s*\+\s*(https?://[^\[\]]+)\]")
+
+    # Инициализируем список рядов кнопок
+    keyboard_rows = []
+
+    # Разбиваем сообщение на строки
+    lines = message.strip().split("\n")
+
+    for line in lines:
+        # Находим все совпадения в строке
+        matches = pattern.findall(line)
+        if matches:
+            row = []
+            for text, url in matches:
+                button = (text.strip(), url.strip())
+                row.append(button)
+            keyboard_rows.append(row)
+
+    return keyboard_rows