commit aff0bc29901a972dbf3343642da55589010791e6 Author: Dmitry Evdokimov Date: Tue May 12 23:37:04 2026 +0300 first commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..34543f0 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,4 @@ +venv +__pycache__ +*.pyc +.git diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..91ee406 --- /dev/null +++ b/.gitignore @@ -0,0 +1,59 @@ +# OS / editors +.DS_Store +Thumbs.db +.idea/ +.vscode/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +.python-version +.mypy_cache/ +.pytest_cache/ +.ruff_cache/ +.pyre/ +.tox/ +.nox/ +.coverage +.coverage.* +htmlcov/ + +# Virtualenvs +.venv/ +venv/ +env/ +ENV/ + +# Logs +*.log +logs/ +coursedoc/ +.codex-venv-docx/ +data/ +postgres.env + +# Secrets / env files +.env +.env.local +.env.*.local +bot/.env +menu_scraper/.env +rag_api/.env + +# Runtime data +data/chroma/ +data/menu/*.json +data/knowledge/ + +# Local databases / temp +*.sqlite3 +*.db +*.tmp +tmp/ + +# Build artifacts +build/ +dist/ +*.egg-info/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..875a0d7 --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +# GorychBot + +Набор сервисов для Telegram-бота шаурмечной "Горыч" с отдельным парсером меню и отдельным RAG API. + +Проект разделён на независимые части: + +- `tgbot` — основной Telegram-бот на `aiogram`. +- `menu_scraper` — сервис, который парсит меню с `https://gorych34.ru/`. +- `rag_api` — FastAPI-сервис с RAG, локальными эмбеддингами `sergeyzh/rubert-mini-frida` и function calling для подбора блюд. +- `redisdb` — Redis для бота. +- `postgredb` — PostgreSQL для бота. + +## Что делает проект + +1. `menu_scraper` забирает меню с сайта Горыча и сохраняет нормализованный JSON в `data/menu/gorich_menu.json`. +2. `rag_api` индексирует: + - знания о заведении: описание, контакты, режим, доставка, соцсети; + - меню: названия, описания, ингредиенты, цены, размеры, фото. +3. `rag_api` умеет: + - отвечать на вопросы о заведении через RAG; + - вызывать tool `find_menu_items` для подбора блюд по бюджету, категории и ингредиентам. +4. `tgbot` работает отдельно от RAG API и использует PostgreSQL + Redis. + +## Сервисы + +### `tgbot` + +Назначение: +- Telegram-бот на `aiogram`. +- Хранит данные в PostgreSQL. +- Использует Redis для FSM/storage. + +Файл env: +- `bot/.env` + +Зависимости: +- `redisdb` +- `postgredb` + +Стартовая команда: +```bash +python aiogram_run.py +``` + +### `menu_scraper` + +Назначение: +- Парсит меню с сайта Горыча. +- Берёт встроенный каталог товаров из JSON на странице `gorych34.ru`. +- Пишет результат в `data/menu/gorich_menu.json`. + +Файл env: +- `menu_scraper/.env` + +Порт: +- `8010` + +Основные endpoints: +- `GET /health` +- `POST /scrape` +- `GET /items` +- `GET /items/{item_id}` + +Пример ответа: +- один snapshot меню с `total_items` и массивом `items` + +### `rag_api` + +Назначение: +- Отдельный API для вопросов о заведении. +- RAG по сайту, доставке, контактам и соцсетям. +- Локальные эмбеддинги через `sergeyzh/rubert-mini-frida` на CPU. +- Function calling через OpenRouter для подбора блюд из меню. + +Файл env: +- `rag_api/.env` + +Порт: +- внешний `8001` +- внутренний контейнерный `8000` + +Основные endpoints: +- `GET /health` +- `POST /chat` +- `POST /admin/reindex` +- `GET /menu/search` + +`POST /chat` принимает: +```json +{ + "message": "Посоветуй что-нибудь острое из пиццы до 400 рублей", + "history": [] +} +``` + +`GET /menu/search` умеет: +- `query` +- `max_price` +- `category` +- `must_include` +- `must_not_include` +- `limit` + +Пример: +```bash +curl "http://localhost:8001/menu/search?query=острая%20пицца&max_price=450&category=пицца&limit=3" +``` + +## Структура данных + +### `data/menu/gorich_menu.json` + +Содержит: +- `item_id` +- `name` +- `category` +- `description` +- `ingredients` +- `price` +- `price_label` +- `size` +- `photo_url` +- `source_url` +- `scraped_at` + +### `data/chroma/` + +Локальная база ChromaDB для RAG: +- коллекция знаний о заведении; +- коллекция документов меню. + +## Настройка + +В проекте используются отдельные env-файлы по сервисам: + +- `bot/.env` +- `menu_scraper/.env` +- `rag_api/.env` + +Примеры: + +- `.env.example` +- `bot/.env.example` +- `menu_scraper/.env.example` +- `rag_api/.env.example` + +Минимум для запуска: + +1. Заполнить `bot/.env` +2. Заполнить `rag_api/.env` +3. При необходимости поправить `menu_scraper/.env` + +Важно: +- `OPENROUTER_API_KEY` нужен только для `rag_api`. +- Для OpenRouter лучше использовать модель, которая нормально работает с tools в вашем регионе. Сейчас в конфиге стоит `mistralai/mistral-medium-3-5`. + +## Запуск + +Поднять всё: + +```bash +docker compose up -d --build +``` + +Проверка сервисов: + +```bash +curl http://localhost:8010/health +curl http://localhost:8001/health +``` + +Пересобрать индекс RAG вручную: + +```bash +curl -X POST http://localhost:8001/admin/reindex +``` + +Перепарсить меню вручную: + +```bash +curl -X POST http://localhost:8010/scrape +``` + +## Что уже реализовано + +- отдельный парсер меню; +- сохранение меню в JSON; +- отдельный RAG API; +- ChromaDB; +- локальные эмбеддинги `rubert-mini-frida`; +- function calling для подбора блюд; +- подбор по семантике + лексическим признакам меню; +- Docker Compose для всех сервисов. + +## Что важно знать + +- `rag_api` сейчас не вшит напрямую в `tgbot`; это отдельный сервис с HTTP API. +- `rag_api` на первом старте может подниматься дольше из-за загрузки модели эмбеддингов. +- `data/` хранит runtime-данные и не должен коммититься в git. diff --git a/bot/.env.example b/bot/.env.example new file mode 100644 index 0000000..fbcb485 --- /dev/null +++ b/bot/.env.example @@ -0,0 +1,48 @@ +# ======================== +# Telegram +# ======================== +# Токен Telegram-бота от @BotFather +TOKEN=your_telegram_bot_token + +# Telegram user_id главного администратора +BASE_ADMIN=123456789 + +# ======================== +# Redis +# ======================== +# Для локального запуска можно оставить localhost. +# В Docker Compose это значение переопределяется на redis://redisdb:6379/0 +REDIS_URL=redis://127.0.0.1:6379/0 + +# ======================== +# PostgreSQL +# ======================== +# Для локального запуска можно оставить localhost. +# В Docker Compose POSTGRES_HOST переопределяется на postgredb +POSTGRES_DB=gorychbot +POSTGRES_USER=postgres +POSTGRES_PASSWORD=change_me +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 + +# ======================== +# App +# ======================== +# Таймзона для времени и дат внутри бота +TIMEZONE=Europe/Moscow + +# Прокси для Telegram Bot API. +# Формат: +# - socks5:ip:port +# - http:ip:port +# - socks5:ip:port:user:pass +# - http:ip:port:user:pass +BOT_PROXY= + +# URL сервиса ответов. +# Для локального запуска можно оставить localhost. +# В Docker Compose это значение переопределяется на http://rag_api:8000 +RAG_API_URL=http://127.0.0.1:8001 + +# Таймаут запроса к RAG API в секундах +RAG_API_TIMEOUT_SECONDS=60 diff --git a/bot/Dockerfile b/bot/Dockerfile new file mode 100644 index 0000000..422fb11 --- /dev/null +++ b/bot/Dockerfile @@ -0,0 +1,14 @@ +# базовый образ Python +FROM python:3.13-alpine + +# рабочая директория +WORKDIR /app + +# файл зависимостей +COPY bot/requirements.txt /app/ + +# устанавливаем зависимости +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +COPY ./bot /app diff --git a/bot/aiogram_run.py b/bot/aiogram_run.py new file mode 100644 index 0000000..3f6fb83 --- /dev/null +++ b/bot/aiogram_run.py @@ -0,0 +1,65 @@ +# Aiogram +from aiogram.types.bot_command_scope_all_private_chats import ( + BotCommandScopeAllPrivateChats, +) + +# Bot +from create_bot import bot, dp, start_command, orm + +# Entry +from handlers.start import start_router, types +from handlers.admin.main import admin_main_router + +# Client handlers +from handlers.client import client_main_router + +# Admin handlers +from handlers.admin.list_of_users import list_of_users_router +from handlers.admin.statistic import admin_statistic_router +from handlers.admin.management import admin_management_router +from handlers.admin.mailer import admin_mailer_router +from handlers.admin.settings import admin_settings_router +from handlers.admin.blacklist import admin_blacklist_router + +# middlewares +from middlewares.users_control import * +from middlewares.album import AlbumMiddleware + +# Another +from decouple import config +from uvloop import run + + +async def main(): + + await orm.proceed_schemas() + await bot.set_my_commands(start_command, scope=BotCommandScopeAllPrivateChats()) + await orm.create_admin(int(config("BASE_ADMIN")), "base_admin", "base_admin") + + dp.message.middleware(BlacklistMiddleware()) + dp.callback_query.middleware(BlacklistMiddleware()) + dp.message.middleware(AntiFloodMiddleware()) + dp.message.middleware(AlbumMiddleware()) + + # ENTRY POINTS + dp.include_routers(start_router, admin_main_router) + + # CLIENT + dp.include_routers(client_main_router) + + # ADMIN + dp.include_routers( + list_of_users_router, + admin_statistic_router, + admin_management_router, + admin_mailer_router, + admin_settings_router, + admin_blacklist_router, + ) + + # await bot.delete_webhook(drop_pending_updates = True) + await dp.start_polling(bot) + + +if __name__ == "__main__": + run(main()) diff --git a/bot/create_bot.py b/bot/create_bot.py new file mode 100644 index 0000000..279e8c9 --- /dev/null +++ b/bot/create_bot.py @@ -0,0 +1,41 @@ +# aiogram +from aiogram import Bot, Dispatcher +from aiogram.client.default import DefaultBotProperties +from aiogram.enums import ParseMode +from aiogram.fsm.storage.redis import RedisStorage, DefaultKeyBuilder, StorageKey +from aiogram.types import BotCommand + +# cfg +from decouple import config + +# db +from database.orm import ORM + +# utils +from utils.proxy import build_bot_session + +# another +import logging, pytz + + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) +redis_url = config("REDIS_URL") +bot_session = build_bot_session() +bot = Bot( + token=config("TOKEN"), + session=bot_session, + default=DefaultBotProperties( + parse_mode=ParseMode.HTML, + link_preview_is_disabled=True, + ), +) +storage = RedisStorage.from_url(redis_url) +storage.key_builder = DefaultKeyBuilder(with_bot_id=True) +dp = Dispatcher(storage=storage) + +start_command = [BotCommand(command="/start", description="🔄 Перезапустить бота")] +tz = pytz.timezone(config("TIMEZONE")) +orm = ORM() diff --git a/bot/database/__init__.py b/bot/database/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/database/db_models.py b/bot/database/db_models.py new file mode 100644 index 0000000..67ef245 --- /dev/null +++ b/bot/database/db_models.py @@ -0,0 +1,57 @@ +# sqlalchemy +from sqlalchemy.orm import declarative_base +from sqlalchemy import ( + Column, + Integer, + String, + BIGINT, + VARCHAR, + Boolean, + DateTime, + SmallInteger, + ARRAY, + DOUBLE_PRECISION, + Enum, +) +from sqlalchemy.dialects.postgresql import JSONB + +# types +from database.db_types import * + + +# init baseModel +BaseModel = declarative_base() + + +class User(BaseModel): + + __tablename__ = "users" + + user_id = Column(BIGINT, primary_key=True) + username = Column(VARCHAR(33), nullable=True) + fullname = Column(VARCHAR(128), nullable=False) + register_date = Column(DateTime(timezone=True), nullable=False) + + +class Admin(BaseModel): + + __tablename__ = "admins" + + user_id = Column(BIGINT, primary_key=True) + username = Column(VARCHAR(33), nullable=True) + fullname = Column(VARCHAR(128), nullable=False) + + +class Blacklist(BaseModel): + + __tablename__ = "blacklist" + + user_id = Column(BIGINT, primary_key=True) + + +class Setting(BaseModel): + + __tablename__ = "settings" + + name = Column(String, primary_key=True) + value = Column(JSONB, nullable=True) diff --git a/bot/database/db_types.py b/bot/database/db_types.py new file mode 100644 index 0000000..243bee4 --- /dev/null +++ b/bot/database/db_types.py @@ -0,0 +1,16 @@ +import enum + + +# class Type(enum.Enum): +# FIELD1 = "field1" +# FIELD2 = "field2" + +# @classmethod +# def from_string(cls, value: str): +# for item in cls: +# if item.value == value: +# return item +# raise ValueError(f"{value} is not a valid Type") + +# def __str__(self): +# return self.value diff --git a/bot/database/engine.py b/bot/database/engine.py new file mode 100644 index 0000000..6f59f86 --- /dev/null +++ b/bot/database/engine.py @@ -0,0 +1,18 @@ +# sqlalchemy imports +from sqlalchemy.engine import URL +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession +from sqlalchemy.ext.asyncio import create_async_engine as _create_async_engine +from sqlalchemy.orm import sessionmaker + +# another +from typing import Union + + +def create_async_engine(url: Union[URL, str]) -> AsyncEngine: + + return _create_async_engine(url=url, pool_pre_ping=True, pool_recycle=3600) + + +def get_session_maker(engine: AsyncEngine) -> AsyncSession: + + return sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) diff --git a/bot/database/orm.py b/bot/database/orm.py new file mode 100644 index 0000000..937def5 --- /dev/null +++ b/bot/database/orm.py @@ -0,0 +1,229 @@ +# sqlalchemy import +from sqlalchemy import update, select, delete, func + +# Database engine +from database.engine import create_async_engine, get_session_maker + +# DB Models +from database.db_models import * + +# Config +from decouple import config + +# Another +from datetime import datetime +from typing import Any + + +class ORM: + + def __init__(self): + self.async_engine = create_async_engine( + url=f"postgresql+asyncpg://{config('POSTGRES_USER')}:{config('POSTGRES_PASSWORD')}@{config('POSTGRES_HOST')}:{config('POSTGRES_PORT')}/{config('POSTGRES_DB')}" + ) + self.session_maker = get_session_maker(self.async_engine) + + async def proceed_schemas(self) -> None: + async with self.async_engine.begin() as conn: + await conn.run_sync(BaseModel.metadata.create_all) + + # *############################ + # *# USERS # + # *############################ + + async def is_user_exists(self, user_id: int) -> bool: + async with self.session_maker() as session: + async with session.begin(): + query = await session.execute( + select(User.user_id).where(User.user_id == user_id) + ) + + return query.one_or_none() is not None + + async def create_user( + self, user_id: int, username: str, fullname: str, register_date: datetime + ) -> int: + async with self.session_maker() as session: + async with session.begin(): + if not await self.is_user_exists(user_id): + user = User( + user_id=user_id, + username=username, + fullname=fullname, + register_date=register_date, + ) + + session.add(user) + await session.flush() + return user.user_id + else: + return + + async def set_users_field( + self, user_id: int, field: str, value: int | str | bool + ) -> None: + async with self.session_maker() as session: + async with session.begin(): + await session.execute( + update(User) + .where(User.user_id == user_id) + .values({getattr(User, field): value}) + ) + + async def get_user(self, user_id: int) -> User: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars( + select(User).where(User.user_id == user_id) + ) + + return query.one_or_none() + + async def get_all_users(self) -> list[User]: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars(select(User)) + + return query.all() + + async def get_users_count(self) -> int: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars(select(func.count()).select_from(User)) + + return query.one_or_none() + + async def get_all_user_ids(self) -> list[int]: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars(select(User.user_id)) + + return query.all() + + # *############################ + # *# ADMINS # + # *############################ + + async def is_admin_exists(self, user_id: int) -> bool: + async with self.session_maker() as session: + async with session.begin(): + query = await session.execute( + select(Admin.user_id).where(Admin.user_id == user_id) + ) + + return query.one_or_none() is not None + + async def create_admin(self, user_id: int, username: str, fullname: str) -> None: + async with self.session_maker() as session: + async with session.begin(): + admin = Admin(user_id=user_id, username=username, fullname=fullname) + + await session.merge(admin) + + async def get_admin(self, user_id: int) -> Admin: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars( + select(Admin).where(Admin.user_id == user_id) + ) + + return query.one_or_none() + + async def get_all_admins(self) -> list[Admin]: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars(select(Admin)) + + return query.all() + + async def delete_admin(self, user_id: int) -> None: + async with self.session_maker() as session: + async with session.begin(): + await session.execute(delete(Admin).where(Admin.user_id == user_id)) + + async def set_admin_field( + self, user_id: int, field: str, value: int | str | bool + ) -> None: + async with self.session_maker() as session: + async with session.begin(): + await session.execute( + update(Admin) + .where(Admin.user_id == user_id) + .values({getattr(Admin, field): value}) + ) + + # *############################ + # *# SETTINGS # + # *############################ + + async def is_setting_exists(self, name: str) -> bool: + async with self.session_maker() as session: + async with session.begin(): + query = await session.execute( + select(Setting).where(Setting.name == name) + ) + + return query.one_or_none() is not None + + async def create_setting(self, name: str, value: Any) -> None: + async with self.session_maker() as session: + async with session.begin(): + setting = Setting(name=name, value=value) + + await session.merge(setting) + + async def init_settings(self) -> None: ... + + async def get_setting_value(self, name: str) -> Any: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars( + select(Setting.value).where(Setting.name == name) + ) + + return query.one_or_none() + + async def update_setting_value(self, name: str, value: dict | list) -> None: + async with self.session_maker() as session: + async with session.begin(): + await session.execute( + update(Setting) + .where(Setting.name == name) + .values({getattr(Setting, "value"): value}) + ) + + # *############################ + # *# BLACKLIST # + # *############################ + + async def is_blacklisted(self, user_id: int) -> bool: + async with self.session_maker() as session: + async with session.begin(): + query = await session.execute( + select(Blacklist).where(Blacklist.user_id == user_id) + ) + + return query.one_or_none() is not None + + async def create_blacklist(self, user_id: int) -> None: + async with self.session_maker() as session: + async with session.begin(): + blacklist = Blacklist(user_id=user_id) + + await session.merge(blacklist) + + async def get_all_blacklist(self) -> list[int]: + async with self.session_maker() as session: + async with session.begin(): + query = await session.scalars( + select(Blacklist.user_id).order_by(Blacklist.user_id) + ) + + return query.all() + + async def delete_blacklist(self, user_id: int) -> None: + async with self.session_maker() as session: + async with session.begin(): + await session.execute( + delete(Blacklist).where(Blacklist.user_id == user_id) + ) diff --git a/bot/handlers/__init__.py b/bot/handlers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/handlers/admin/__init__.py b/bot/handlers/admin/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/handlers/admin/blacklist.py b/bot/handlers/admin/blacklist.py new file mode 100644 index 0000000..4e2da0e --- /dev/null +++ b/bot/handlers/admin/blacklist.py @@ -0,0 +1,221 @@ +# Aiogram +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F +from aiogram.exceptions import TelegramBadRequest + +# Const +from create_bot import orm + +# Keyboards +from keyboards.admin.main_kbs import * + +# States +from states.admin_states import AdminStates, AdminBlacklistStates + +# Another +from contextlib import suppress + + +# Init +admin_blacklist_router = Router() + + +@admin_blacklist_router.message( + F.text == "🚫 Черный список", StateFilter(AdminStates.main) +) +@admin_blacklist_router.message(F.text == "↩️ Назад", StateFilter(AdminBlacklistStates)) +async def cmd_blacklist(message: types.Message, state: FSMContext): + + msg_text = "🚫 Выберите действие:" + + await message.answer(text=msg_text, reply_markup=get_blacklist_kb()) + + await state.set_state(AdminBlacklistStates.main) + + +# *############################ +# *# ADD # +# *############################ + + +@admin_blacklist_router.message( + F.text == "➕ Добавить", StateFilter(AdminBlacklistStates.main) +) +async def cmd_blacklist_add(message: types.Message, state: FSMContext): + + msg_text = f"➕ Введите User ID:" + + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminBlacklistStates.add_blacklist) + + +@admin_blacklist_router.message(F.text, StateFilter(AdminBlacklistStates.add_blacklist)) +async def cmd_blacklist_add_finish(message: types.Message, state: FSMContext): + + # validation + if not message.text.isdigit(): + await message.answer( + text="⛔️ Только цифры! Повторите попытку:", reply_markup=get_back_kb() + ) + return + + user_id = int(message.text) + + if not await orm.is_user_exists(user_id): + await message.answer( + text="⛔️ Пользователь не существует в БД! Повторите попытку:", + reply_markup=get_back_kb(), + ) + return + + await orm.create_blacklist(user_id=user_id) + + await message.answer(text=f"✅ Черный список обновлен!") + await cmd_blacklist(message, state) + + +# *############################ +# *# DEL # +# *############################ + + +@admin_blacklist_router.message( + F.text == "➖ Удалить", StateFilter(AdminBlacklistStates.main) +) +async def cmd_blacklist_delete(message: types.Message, state: FSMContext): + + msg_text = "➖ Введите User ID:" + + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminBlacklistStates.del_blacklist) + + +@admin_blacklist_router.message(F.text, StateFilter(AdminBlacklistStates.del_blacklist)) +async def cmd_blacklist_delete_finish(message: types.Message, state: FSMContext): + + # validation + if not message.text.isdigit(): + await message.answer( + text="⛔️ Только цифры! Повторите попытку:", reply_markup=get_back_kb() + ) + return + + user_id = int(message.text) + + if not await orm.is_blacklisted(user_id): + await message.answer( + text="⛔️ Пользователь не найден в ЧС! Повторите попытку:", + reply_markup=get_back_kb(), + ) + return + + await orm.delete_blacklist(user_id=user_id) + + await message.answer(text=f"✅ Черный список обновлен!") + + await cmd_blacklist(message, state) + + +# *############################ +# *# LIST # +# *############################ + + +@admin_blacklist_router.message( + F.text == "👁 Открыть список", StateFilter(AdminBlacklistStates.main) +) +async def cmd_blacklist_list(message: types.Message, state: FSMContext): + + await state.update_data(blacklist_offset=0) + items = await orm.get_all_blacklist() + + if not items: + await message.answer(text="💭 Список пуст.") + return + + offset = 0 + max_offset = len(items) // 10 + (1 if len(items) % 10 != 0 else 0) + + msg_text = f"🚫 Черный список {offset + 1}/{max_offset}\n\n" + + for item in items[offset * 10 : (offset + 1) * 10]: + msg_text += f"✦ {item}\n" + + await message.answer( + text=msg_text, + reply_markup=get_bookList_ikb( + prefix="admin_blacklist", + offset=0, + max_offset=max_offset, + items=[], + element_col=10, + ), + ) + + +async def cmd_blacklist_list_query(query: types.CallbackQuery, state: FSMContext): + + data = await state.get_data() + offset = data.get("blacklist_offset") + items = await orm.get_all_blacklist() + + if not items: + await query.answer(text="💭 Список пуст.") + return + + max_offset = len(items) // 10 + (1 if len(items) % 10 != 0 else 0) + + if offset < 0: + offset = max_offset - 1 + await state.update_data(blacklist_offset=offset) + elif offset >= max_offset: + offset = 0 + await state.update_data(blacklist_offset=offset) + + msg_text = f"🚫 Черный список {offset + 1}/{max_offset}\n\n" + + for item in items[offset * 10 : (offset + 1) * 10]: + msg_text += f"✦ {item}\n" + + with suppress(TelegramBadRequest): + await query.message.edit_text( + text=msg_text, + reply_markup=get_bookList_ikb( + prefix="admin_blacklist", + offset=offset, + max_offset=max_offset, + items=[], + element_col=10, + ), + ) + + await query.answer() + + +@admin_blacklist_router.callback_query( + F.data == "admin_blacklist_next", StateFilter(AdminBlacklistStates.main) +) +@admin_blacklist_router.callback_query( + F.data == "admin_blacklist_prev", StateFilter(AdminBlacklistStates.main) +) +@admin_blacklist_router.callback_query( + F.data == "admin_blacklist_status", StateFilter(AdminBlacklistStates.main) +) +async def cmd_blacklist_list_actions(query: types.CallbackQuery, state: FSMContext): + + state_data = await state.get_data() + + if query.data.endswith("next"): + await state.update_data( + blacklist_offset=state_data.get("blacklist_offset", 0) + 1 + ) + elif query.data.endswith("prev"): + await state.update_data( + blacklist_offset=state_data.get("blacklist_offset", 0) - 1 + ) + + await cmd_blacklist_list_query(query, state) diff --git a/bot/handlers/admin/list_of_users.py b/bot/handlers/admin/list_of_users.py new file mode 100644 index 0000000..0c16ed5 --- /dev/null +++ b/bot/handlers/admin/list_of_users.py @@ -0,0 +1,53 @@ +# Aiogram +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F + +# Const +from create_bot import tz, orm + +# States +from states.admin_states import AdminStates + +# Another +import shutil, os +from openpyxl import load_workbook + + +# Init +list_of_users_router = Router() + + +@list_of_users_router.message( + F.text == "📑 Список пользователей", StateFilter(AdminStates.main) +) +async def cmd_list_of_users(message: types.Message, state: FSMContext): + + # copy the table + table_path = shutil.copy( + src="templates/users.xlsx", dst=f"templates/users_list.xlsx" + ) + + # load table + book = load_workbook(filename=table_path) + sheet = book["users"] + + all_clients = await orm.get_all_users() + + for row, user in enumerate(all_clients, 2): + sheet.cell(row=row, column=1, value=user.user_id) + sheet.cell(row=row, column=2, value=user.username) + sheet.cell(row=row, column=3, value=user.fullname) + sheet.cell( + row=row, + column=4, + value=user.register_date.astimezone(tz).strftime(r"%d-%m-%y %H:%M %Z"), + ) + + book.save(table_path) + + await message.answer_document(document=types.FSInputFile(table_path)) + + if os.path.exists(table_path): + os.remove(table_path) diff --git a/bot/handlers/admin/mailer.py b/bot/handlers/admin/mailer.py new file mode 100644 index 0000000..5fd6f4b --- /dev/null +++ b/bot/handlers/admin/mailer.py @@ -0,0 +1,129 @@ +# Aiogram imports +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F + +# Const +from create_bot import bot, orm + +# Keyboards +from keyboards.admin.mailer_kbs import * + +# Utils +from utils.text_tools import parse_links_to_inline_markup + +# States +from states.admin_states import AdminStates, AdminMailerStates + +# Funcs +from handlers.admin.main import show_admin_menu + + +admin_mailer_router = Router() + + +@admin_mailer_router.message(F.text == "✉️ Рассылка", StateFilter(AdminStates.main)) +@admin_mailer_router.message(F.text == "↩️ Назад", StateFilter(AdminMailerStates)) +async def process_mailer_post(message: types.Message, state: FSMContext): + + msg_text = "✉️ Отправьте пост одним сообщением:" + + await message.answer(text=msg_text, reply_markup=get_back_to_main_kb()) + + await state.set_state(AdminMailerStates.post) + + +@admin_mailer_router.message(StateFilter(AdminMailerStates.post)) +async def process_mailer_ikb(message: types.Message, state: FSMContext): + + await state.update_data(admin_mailer_post=message.message_id) + + msg_text = """✉️ Введите кнопки: + +
Отправьте ссылку(и) в формате: +[Текст кнопки + ссылка] +Пример: +[Переводчик + https://t.me/TransioBot] + +Чтобы добавить несколько кнопок в один ряд, пишите ссылки рядом с предыдущими. +Формат: +[Первый текст + первая ссылка][Второй текст + вторая ссылка] + +Чтобы добавить несколько кнопок в строчку, пишите новые ссылки с новой строки. +Формат: +[Первый текст + первая ссылка] +[Второй текст + вторая ссылка]
""" + + await message.answer( + text=msg_text, reply_markup=get_skip_kb(), disable_web_page_preview=True + ) + + await state.set_state(AdminMailerStates.ikb) + + +@admin_mailer_router.message(F.text, StateFilter(AdminMailerStates.ikb)) +async def process_mailer_preview(message: types.Message, state: FSMContext): + + ikb = ( + parse_links_to_inline_markup(message.text) + if message.text != "↪️ Пропустить" + else None + ) + await state.update_data(admin_mailer_ikb=ikb) + + state_data = await state.get_data() + post = state_data.get("admin_mailer_post") + + await message.answer(text="✉️ Предпросмотр:", reply_markup=get_mailer_finish_kb()) + + try: + await bot.copy_message( + chat_id=message.from_user.id, + from_chat_id=message.from_user.id, + message_id=post, + reply_markup=get_mailer_btn_ikb(buttons_preset=ikb), + ) + except: + await message.answer(text="🔴 Ошибка!") + await process_mailer_post(message, state) + return + + await state.set_state(AdminMailerStates.preview) + + +@admin_mailer_router.message( + F.text == "🟢 Начать рассылку", StateFilter(AdminMailerStates.preview) +) +async def process_mailer_finish(message: types.Message, state: FSMContext): + + state_data = await state.get_data() + ikb = state_data.get("admin_mailer_ikb") + post = state_data.get("admin_mailer_post") + + all_users = await orm.get_all_user_ids() + + # info + await message.answer(text="▶️✉️ Рассылка запущена...") + + await state.clear() + + # back to main menu + await show_admin_menu(message, state) + + counter = 0 + for user_id in all_users: + try: + await bot.copy_message( + chat_id=user_id, + from_chat_id=message.from_user.id, + message_id=post, + reply_markup=get_mailer_btn_ikb(buttons_preset=ikb), + ) + counter += 1 + except: + pass + + await message.answer( + text=f"✅ Рассылка завершена! Сообщение отправлено {counter}/{len(all_users)}." + ) diff --git a/bot/handlers/admin/main.py b/bot/handlers/admin/main.py new file mode 100644 index 0000000..e626cc4 --- /dev/null +++ b/bot/handlers/admin/main.py @@ -0,0 +1,67 @@ +# Aiogram +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import Command, StateFilter +from aiogram import Router, F + +# Const +from create_bot import orm + +# Keyboards +from keyboards.admin.main_kbs import * + +# States +from states.admin_states import ( + AdminStates, + AdminMailerStates, + AdminManagementStates, + AdminSettingsStates, + AdminBlacklistStates, +) + +# Funcs +from handlers.start import cmd_start + + +# Init +admin_main_router = Router() + + +@admin_main_router.message(Command("admin"), StateFilter("*")) +async def cmd_login_as_admin(message: types.Message, state: FSMContext): + + if message.chat.type != "private": + return + + is_admin_exists = await orm.is_admin_exists(user_id=message.from_user.id) + + if is_admin_exists: + await show_admin_menu(message, state) + else: + await message.answer(text="🤨") + + +@admin_main_router.message(F.text == "🔚 Выйти", StateFilter(AdminStates.main)) +async def cmd_admin_exit(message: types.Message, state: FSMContext): + + await message.answer(text="🚪⠀", reply_markup=types.ReplyKeyboardRemove()) + + await cmd_start(message, state) + + +@admin_main_router.message( + F.text == "↩️ Вернуться в меню", + StateFilter( + AdminManagementStates.main, + AdminMailerStates.post, + AdminSettingsStates.main, + AdminBlacklistStates.main, + ), +) +async def show_admin_menu(message: types.Message, state: FSMContext): + + msg_text = "👮‍♂️ Вы находитесь в админ-панели" + + await message.answer(text=msg_text, reply_markup=get_main_menu_kb()) + + await state.set_state(AdminStates.main) diff --git a/bot/handlers/admin/management.py b/bot/handlers/admin/management.py new file mode 100644 index 0000000..992f7fc --- /dev/null +++ b/bot/handlers/admin/management.py @@ -0,0 +1,142 @@ +# Aiogram imports +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F +from aiogram.exceptions import TelegramBadRequest + +# Const +from create_bot import bot, storage, StorageKey, orm + +# Keyboards +from keyboards.admin.main_kbs import * + +# States +from states.admin_states import AdminStates, AdminManagementStates + +# Config +from decouple import config + +# Another +from contextlib import suppress + + +# Init +admin_management_router = Router() + + +@admin_management_router.message( + F.text == "👮‍♂️ Управление админами", StateFilter(AdminStates.main) +) +@admin_management_router.message( + F.text == "↩️ Назад", StateFilter(AdminManagementStates) +) +async def cmd_management(message: types.Message, state: FSMContext): + + admins = await orm.get_all_admins() + + msg_text = "👮‍♂️ Действующие администраторы\n" + + for admin in admins: + msg_text += f"✦ [{admin.user_id}]: {admin.username if admin.username else admin.fullname}\n" + + msg_text += f"\n🔽 Выберите действие:" + + await message.answer(text=msg_text, reply_markup=get_add_admins_kb()) + + await state.set_state(AdminManagementStates.main) + + +# *############################ +# *# ADD # +# *############################ + + +@admin_management_router.message( + F.text == "➕ Добавить", StateFilter(AdminManagementStates.main) +) +async def cmd_management_add_id(message: types.Message, state: FSMContext): + + msg_text = "➕ Введите User ID нового админа:" + + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminManagementStates.add_admin) + + +@admin_management_router.message(F.text, StateFilter(AdminManagementStates.add_admin)) +async def cmd_management_add_finish(message: types.Message, state: FSMContext): + + # validation + if not message.text.isdigit(): + await message.answer( + text="⛔️ Только цифры! Повторите попытку:", reply_markup=get_back_kb() + ) + return + + user_id = int(message.text) + + if not await orm.is_user_exists(user_id): + await message.answer( + text="⛔️ Пользователь не существует в БД! Повторите попытку:", + reply_markup=get_back_kb(), + ) + return + + user = await orm.get_user(user_id) + await orm.create_admin(user.user_id, user.username, user.fullname) + await message.answer("✅ Успешно!") + await cmd_management(message, state) + + +# *############################ +# *# DELETE # +# *############################ + + +@admin_management_router.message( + F.text == "➖ Удалить", StateFilter(AdminManagementStates.main) +) +async def cmd_management_delete(message: types.Message, state: FSMContext): + + msg_text = "➖ Введите ID админа для удаления:" + + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminManagementStates.del_admin) + + +@admin_management_router.message(F.text, StateFilter(AdminManagementStates.del_admin)) +async def cmd_management_delete_finish(message: types.Message, state: FSMContext): + + # validation + if not message.text.isdigit(): + await message.answer(text="⛔️ Только цифры! Повторите попытку:") + return + + user_id = int(message.text) + + if user_id == int(config("BASE_ADMIN")): + await message.answer( + text="⛔️ Отказано! Повторите попытку:", reply_markup=get_back_kb() + ) + return + + if not await orm.is_admin_exists(user_id): + await message.answer(text="⛔️ Админ не найден! Повторите попытку:") + return + + # change admin state + with suppress(TelegramBadRequest): + await bot.send_message( + chat_id=user_id, + text="☹️ Вы больше не являетесь админом!", + reply_markup=types.ReplyKeyboardRemove(), + ) + + await storage.set_state( + key=StorageKey(bot_id=bot.id, chat_id=user_id, user_id=user_id), state=None + ) + await orm.delete_admin(user_id) + await message.answer("✅ Успешно!") + await cmd_management(message, state) diff --git a/bot/handlers/admin/settings.py b/bot/handlers/admin/settings.py new file mode 100644 index 0000000..73497f6 --- /dev/null +++ b/bot/handlers/admin/settings.py @@ -0,0 +1,85 @@ +# Aiogram imports +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F + +# Const +from create_bot import orm + +# Keyboards +from keyboards.admin.main_kbs import * + +# States +from states.admin_states import AdminStates, AdminSettingsStates + + +# Init +admin_settings_router = Router() + + +@admin_settings_router.message(F.text == "↩️ Назад", StateFilter(AdminSettingsStates)) +@admin_settings_router.message(F.text == "⚙️ Настройки", StateFilter(AdminStates.main)) +async def cmd_settings(message: types.Message, state: FSMContext): + + msg_text = "⚙️ Выберите, что хотите изменить:" + + await message.answer(text=msg_text, reply_markup=get_settings_kb()) + + await state.set_state(AdminSettingsStates.main) + + +# *############################ +# *# EDIT PHOTO # +# *############################ + + +@admin_settings_router.message( + F.text.in_({"🖼 ..."}), StateFilter(AdminSettingsStates.main) +) +async def cmd_edit_photo(message: types.Message, state: FSMContext): + + x = {"🖼 ...": "..."} + + setting_key = x.get(message.text) + await state.update_data(setting_key=setting_key) + photo = await orm.get_setting_value(setting_key) + + msg_text = f"""Текущее значение: +
{photo}
+ +⌨️ Отправьте фото для изменения:""" + + if photo: + await message.answer_photo( + photo=photo, caption=msg_text, reply_markup=get_back_kb() + ) + else: + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminSettingsStates.edit_photo) + + +@admin_settings_router.message(F.photo, StateFilter(AdminSettingsStates.edit_photo)) +async def cmd_edit_photo_setup(message: types.Message, state: FSMContext): + + photo = message.photo[-1].file_id + + state_data = await state.get_data() + setting_key = state_data.get("setting_key") + + await orm.update_setting_value(setting_key, photo) + + msg_text = f"""Текущее значение: +
{photo}
+ +⌨️ Отправьте фото для изменения:""" + + if photo: + await message.answer_photo( + photo=photo, caption=msg_text, reply_markup=get_back_kb() + ) + else: + await message.answer(text=msg_text, reply_markup=get_back_kb()) + + await state.set_state(AdminSettingsStates.edit_photo) diff --git a/bot/handlers/admin/statistic.py b/bot/handlers/admin/statistic.py new file mode 100644 index 0000000..9b38b1b --- /dev/null +++ b/bot/handlers/admin/statistic.py @@ -0,0 +1,28 @@ +# Aiogram +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import StateFilter +from aiogram import Router, F + +# Const +from create_bot import orm + +# States +from states.admin_states import AdminStates + +# Init +admin_statistic_router = Router() + + +@admin_statistic_router.message( + F.text == "📊 Статистика", StateFilter(AdminStates.main) +) +async def cmd_statistic(message: types.Message, state: FSMContext): + + users_count = await orm.get_users_count() + + msg_text = f"""📊 Статистика + +🔹 Кол-во пользователей в боте: {users_count:,} чел.""" + + await message.answer(text=msg_text) diff --git a/bot/handlers/client/__init__.py b/bot/handlers/client/__init__.py new file mode 100644 index 0000000..f4eda0f --- /dev/null +++ b/bot/handlers/client/__init__.py @@ -0,0 +1 @@ +from .main import client_main_router diff --git a/bot/handlers/client/main.py b/bot/handlers/client/main.py new file mode 100644 index 0000000..aacfb3b --- /dev/null +++ b/bot/handlers/client/main.py @@ -0,0 +1,154 @@ +# Aiogram +import aiogram.types as types +from aiogram import Router, F +from aiogram.filters import StateFilter +from aiogram.fsm.context import FSMContext +from aiogram.exceptions import TelegramBadRequest +from aiogram.types import BufferedInputFile + +import aiohttp + +# Keyboards +from keyboards.reply_keyboards import get_client_main_kb + +# Services +from services.rag_api import ask_rag_api, RagApiError + +# States +from states.client_states import MainStates + +# Utils +from utils.text_tools import format_telegram_html + + +client_main_router = Router() + +POPULAR_QUESTION_MAP = { + "🕒 До скольки вы работаете?": "До скольки вы работаете?", + "🚚 Как работает доставка?": "Есть ли доставка и как заказать?", + "🌯 Что посоветуете из шаурмы?": "Что посоветуете из шаурмы?", + "🍕 Подобрать пиццу до 400 ₽": "Подбери вкусную пиццу до 400 рублей", + "🧀 Что у вас есть с сыром?": "Что у вас есть с сыром?", + "🔥 Что у вас есть острое?": "Что у вас есть острое?", +} + +MAX_HISTORY_MESSAGES = 8 +MAX_MENU_CARDS = 3 +PHOTO_DOWNLOAD_TIMEOUT_SECONDS = 20 + + +def trim_history(history: list[dict[str, str]]) -> list[dict[str, str]]: + return history[-MAX_HISTORY_MESSAGES:] + + +def shorten_text(text: str, limit: int = 240) -> str: + cleaned = " ".join(str(text).split()) + if len(cleaned) <= limit: + return cleaned + return cleaned[: limit - 1].rstrip() + "…" + + +def build_menu_item_caption(item: dict[str, str]) -> str: + name = format_telegram_html(item.get("name", "Позиция из меню")) + raw_price = item.get("price_label") or "Цена уточняется" + if item.get("price") is None or "бесплат" in str(raw_price).lower(): + raw_price = "Цена уточняется" + price = format_telegram_html(raw_price) + description = format_telegram_html(shorten_text(item.get("description", ""))) + size = format_telegram_html(item.get("size") or "") + category = format_telegram_html(item.get("category") or "") + + caption_parts = [f"{name}", f"💸 {price}"] + if category or size: + meta = " • ".join(part for part in [category, size] if part) + caption_parts.append(meta) + if description: + caption_parts.append(description) + return "\n".join(caption_parts) + + +async def send_menu_cards(message: types.Message, items: list[dict[str, str]]) -> None: + for item in items[:MAX_MENU_CARDS]: + caption = build_menu_item_caption(item) + photo_url = item.get("photo_url") + + if photo_url: + try: + photo = await download_menu_photo(str(photo_url), str(item.get("item_id") or "menu")) + await message.answer_photo(photo=photo, caption=caption) + continue + except TelegramBadRequest: + pass + except Exception: + try: + await message.answer_photo(photo=photo_url, caption=caption) + continue + except TelegramBadRequest: + pass + + await message.answer(caption) + + +async def download_menu_photo(photo_url: str, item_id: str) -> BufferedInputFile: + timeout = aiohttp.ClientTimeout(total=PHOTO_DOWNLOAD_TIMEOUT_SECONDS) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(photo_url) as response: + response.raise_for_status() + content = await response.read() + + extension = photo_url.rsplit(".", 1)[-1].split("?", 1)[0] if "." in photo_url else "jpg" + filename = f"menu_{item_id}.{extension or 'jpg'}" + return BufferedInputFile(content, filename=filename) + + +@client_main_router.message(F.text == "🧹 Очистить диалог", StateFilter(MainStates.main)) +async def clear_dialog(message: types.Message, state: FSMContext): + await state.update_data(rag_history=[]) + await message.answer( + "🧼 Диалог очищен. Можете задать новый вопрос о меню, доставке или заведении.", + reply_markup=get_client_main_kb(), + ) + + +@client_main_router.message(F.text, StateFilter(MainStates.main)) +async def handle_client_message(message: types.Message, state: FSMContext): + if message.chat.type != "private": + return + + if not message.text: + return + + user_text = POPULAR_QUESTION_MAP.get(message.text, message.text) + state_data = await state.get_data() + history = state_data.get("rag_history", []) + + waiting_message = await message.answer("🤖 Думаю над ответом...") + + try: + response = await ask_rag_api(message=user_text, history=history) + except RagApiError: + await waiting_message.edit_text( + "⚠️ Не получилось обратиться к сервису ответов. Попробуйте ещё раз через минуту." + ) + return + except Exception: + await waiting_message.edit_text( + "⚠️ Что-то пошло не так. Попробуйте отправить вопрос ещё раз." + ) + return + + answer = format_telegram_html(response.get("answer", "⚠️ Не удалось получить ответ.")) + updated_history = trim_history( + [ + *history, + {"role": "user", "content": user_text}, + {"role": "assistant", "content": answer}, + ] + ) + + await state.update_data(rag_history=updated_history) + await waiting_message.edit_text(answer) + + tool_results = response.get("tool_results") or [] + if tool_results: + await send_menu_cards(message, tool_results) diff --git a/bot/handlers/start.py b/bot/handlers/start.py new file mode 100644 index 0000000..c96633e --- /dev/null +++ b/bot/handlers/start.py @@ -0,0 +1,58 @@ +# Aiogram +import aiogram.types as types +from aiogram.fsm.context import FSMContext +from aiogram.filters import CommandStart, StateFilter +from aiogram import Router, F + +# Utils +from utils.text_tools import to_html + +# Const +from create_bot import orm + +# Keyboards +from keyboards.reply_keyboards import get_client_main_kb + +# States +from states.client_states import MainStates + +# Another +from datetime import datetime, timezone + + +# Init +start_router = Router() + + +@start_router.message(CommandStart(), StateFilter("*")) +async def cmd_start(message: types.Message, state: FSMContext): + + if message.chat.type != "private": + return + + user_id = message.from_user.id + username = ( + "@" + message.from_user.username + if message.from_user.username is not None + else None + ) + fullname = to_html(message.from_user.full_name) + + await orm.create_user( + user_id=user_id, + username=username, + fullname=fullname, + register_date=datetime.now(timezone.utc), + ) + + msg_text = ( + f"👋 Привет, {fullname}!\n\n" + "Я бот шаурмечной Горыч.\n" + "Подскажу по меню, доставке, режиму работы и помогу подобрать блюдо.\n\n" + "✨ Выберите популярный вопрос ниже или просто напишите свой." + ) + + await message.answer(text=msg_text, reply_markup=get_client_main_kb()) + + await state.update_data(rag_history=[]) + await state.set_state(MainStates.main) diff --git a/bot/keyboards/__init__.py b/bot/keyboards/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/keyboards/admin/mailer_kbs.py b/bot/keyboards/admin/mailer_kbs.py new file mode 100644 index 0000000..7f9bed4 --- /dev/null +++ b/bot/keyboards/admin/mailer_kbs.py @@ -0,0 +1,55 @@ +# Aiogram imports +from aiogram.utils.keyboard import ReplyKeyboardBuilder, InlineKeyboardBuilder +from aiogram.types import InlineKeyboardButton, KeyboardButton + + +def get_back_to_main_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="↩️ Вернуться в меню")) + + return builder.as_markup(resize_keyboard=True) + + +def get_back_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="↩️ Назад")) + + return builder.as_markup(resize_keyboard=True) + + +def get_skip_kb(): + + builder = ReplyKeyboardBuilder() + + builder.add(KeyboardButton(text="↪️ Пропустить"), KeyboardButton(text="↩️ Назад")) + builder.adjust(1) + + return builder.as_markup(resize_keyboard=True) + + +def get_mailer_finish_kb(): + + builder = ReplyKeyboardBuilder() + + builder.add( + KeyboardButton(text="🟢 Начать рассылку"), KeyboardButton(text="↩️ Назад") + ) + builder.adjust(1) + + return builder.as_markup(resize_keyboard=True, is_persistent=True) + + +def get_mailer_btn_ikb(buttons_preset: list[str] | None): + + builder = InlineKeyboardBuilder() + + if buttons_preset: + for row in buttons_preset: + for btn_name, btn_url in row: + builder.row(InlineKeyboardButton(text=btn_name, url=btn_url)) + + return builder.as_markup() diff --git a/bot/keyboards/admin/main_kbs.py b/bot/keyboards/admin/main_kbs.py new file mode 100644 index 0000000..5d99014 --- /dev/null +++ b/bot/keyboards/admin/main_kbs.py @@ -0,0 +1,95 @@ +# Aiogram imports +from aiogram.utils.keyboard import ( + ReplyKeyboardBuilder, + KeyboardButton, + InlineKeyboardBuilder, +) +from aiogram.types import ( + ReplyKeyboardMarkup, + InlineKeyboardMarkup, + InlineKeyboardButton, +) + + +def get_main_menu_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="📊 Статистика"), KeyboardButton(text="✉️ Рассылка")) + + builder.row( + KeyboardButton(text="🚫 Черный список"), KeyboardButton(text="⚙️ Настройки") + ) + + builder.row( + KeyboardButton(text="📑 Список пользователей"), + KeyboardButton(text="👮‍♂️ Управление админами"), + ) + + builder.row(KeyboardButton(text="🔚 Выйти")) + + return builder.as_markup(resize_keyboard=True, is_persistent=True) + + +def get_add_admins_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="➕ Добавить"), KeyboardButton(text="➖ Удалить")) + + builder.row(KeyboardButton(text="↩️ Вернуться в меню")) + + return builder.as_markup(resize_keyboard=True, is_persistent=True) + + +def get_back_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="↩️ Назад")) + + return builder.as_markup(resize_keyboard=True) + + +def get_settings_kb() -> ReplyKeyboardMarkup: + + builder = ReplyKeyboardBuilder() + + builder.add(KeyboardButton(text="↩️ Вернуться в меню")) + builder.adjust(2) + + return builder.as_markup(resize_keyboard=True, is_persistent=True) + + +def get_blacklist_kb(): + + builder = ReplyKeyboardBuilder() + + builder.row(KeyboardButton(text="👁 Открыть список")) + + builder.row(KeyboardButton(text="➕ Добавить"), KeyboardButton(text="➖ Удалить")) + + builder.row(KeyboardButton(text="↩️ Вернуться в меню")) + + return builder.as_markup(resize_keyboard=True, is_persistent=True) + + +def get_bookList_ikb( + prefix: str, offset: int, max_offset: int, items: list[tuple], element_col: int = 10 +) -> InlineKeyboardMarkup: + + builder = InlineKeyboardBuilder() + + for item_id, item_name in items[offset * element_col : (offset + 1) * element_col]: + builder.row( + InlineKeyboardButton( + text=f"{item_name}", callback_data=f"{prefix}_pick_{item_id}" + ) + ) + + builder.row( + InlineKeyboardButton(text="⬅️", callback_data=f"{prefix}_prev"), + InlineKeyboardButton(text="➡️", callback_data=f"{prefix}_next"), + ) + + return builder.as_markup() diff --git a/bot/keyboards/inline_keyboards.py b/bot/keyboards/inline_keyboards.py new file mode 100644 index 0000000..0175ad6 --- /dev/null +++ b/bot/keyboards/inline_keyboards.py @@ -0,0 +1,3 @@ +# Aiogram imports +from aiogram.utils.keyboard import InlineKeyboardBuilder +from aiogram.types import InlineKeyboardMarkup, InlineKeyboardButton diff --git a/bot/keyboards/reply_keyboards.py b/bot/keyboards/reply_keyboards.py new file mode 100644 index 0000000..0019260 --- /dev/null +++ b/bot/keyboards/reply_keyboards.py @@ -0,0 +1,28 @@ +# Aiogram imports +from aiogram.utils.keyboard import ReplyKeyboardBuilder +from aiogram.types import ReplyKeyboardMarkup, KeyboardButton + + +POPULAR_QUESTIONS = [ + "🕒 До скольки вы работаете?", + "🚚 Как работает доставка?", + "🌯 Что посоветуете из шаурмы?", + "🍕 Подобрать пиццу до 400 ₽", + "🧀 Что у вас есть с сыром?", + "🔥 Что у вас есть острое?", +] + + +def get_client_main_kb() -> ReplyKeyboardMarkup: + builder = ReplyKeyboardBuilder() + + for question in POPULAR_QUESTIONS: + builder.add(KeyboardButton(text=question)) + + builder.add(KeyboardButton(text="🧹 Очистить диалог")) + + builder.adjust(2, 2, 2, 1) + return builder.as_markup( + resize_keyboard=True, + input_field_placeholder="Спросите про меню, доставку или режим работы", + ) diff --git a/bot/middlewares/album.py b/bot/middlewares/album.py new file mode 100644 index 0000000..e316b71 --- /dev/null +++ b/bot/middlewares/album.py @@ -0,0 +1,60 @@ +import asyncio +from typing import Any, Dict, Union + +from aiogram import BaseMiddleware +from aiogram.types import Message + + +class AlbumMiddleware(BaseMiddleware): + def __init__(self, latency: Union[int, float] = 0.19): + # Initialize latency and album_data dictionary + self.latency = latency + self.album_data = {} + + # + def collect_album_messages(self, event: Message): + """ + Collect messages of the same media group. + """ + # # Check if media_group_id exists in album_data + if event.media_group_id not in self.album_data: + # # Create a new entry for the media group + self.album_data[event.media_group_id] = {"messages": []} + # + # # Append the new message to the media group + self.album_data[event.media_group_id]["messages"].append(event) + # + # # Return the total number of messages in the current media group + return len(self.album_data[event.media_group_id]["messages"]) + + # + async def __call__(self, handler, event: Message, data: Dict[str, Any]) -> Any: + """ + Main middleware logic. + """ + # # If the event has no media_group_id, pass it to the handler immediately + if not event.media_group_id: + return await handler(event, data) + # + # # Collect messages of the same media group + total_before = self.collect_album_messages(event) + # + # # Wait for a specified latency period + await asyncio.sleep(self.latency) + # + # # Check the total number of messages after the latency + total_after = len(self.album_data[event.media_group_id]["messages"]) + # + # # If new messages were added during the latency, exit + if total_before != total_after: + return + # + # # Sort the album messages by message_id and add to data + album_messages = self.album_data[event.media_group_id]["messages"] + album_messages.sort(key=lambda x: x.message_id) + data["album"] = album_messages + # + # # Remove the media group from tracking to free up memory + del self.album_data[event.media_group_id] + # # Call the original event handler + return await handler(event, data) diff --git a/bot/middlewares/users_control.py b/bot/middlewares/users_control.py new file mode 100644 index 0000000..8758ce4 --- /dev/null +++ b/bot/middlewares/users_control.py @@ -0,0 +1,93 @@ +from aiogram import types +from aiogram import BaseMiddleware +from datetime import datetime, timedelta, timezone +from collections import deque +import asyncio + +# Const +from create_bot import orm + + +class AntiFloodMiddleware(BaseMiddleware): + + def __init__( + self, max_messages: int = 5, interval: float = 2, block_time: float = 60.0 + ): + """ + Инициализация AntiFloodMiddleware. + + :param max_messages: Максимальное количество сообщений. + :param interval: Временной интервал (в секундах) для проверки сообщений. + :param block_time: Время блокировки пользователя (в секундах). + """ + super(AntiFloodMiddleware, self).__init__() + self.max_messages = max_messages + self.interval = interval + self.block_time = block_time + self.user_messages = {} # user_id: deque of message timestamps + self.blocked_users = {} # user_id: unblock_time + self.lock = asyncio.Lock() # Для обеспечения потокобезопасности + + async def __call__(self, handler, event: types.Message, data): + user_id = event.from_user.id + current_time = datetime.now(timezone.utc) + + async with self.lock: + # Проверка, заблокирован ли пользователь + if user_id in self.blocked_users: + unblock_time = self.blocked_users[user_id] + if current_time < unblock_time: + # Пользователь всё ещё заблокирован + return + else: + # Блокировка истекла + del self.blocked_users[user_id] + + if isinstance(event, types.CallbackQuery): + return await handler(event, data) + + # Инициализация очереди сообщений для пользователя, если её ещё нет + if user_id not in self.user_messages: + self.user_messages[user_id] = deque() + + user_queue = self.user_messages[user_id] + user_queue.append(current_time) + + # Удаление сообщений, которые старше интервала + while ( + user_queue + and (current_time - user_queue[0]).total_seconds() > self.interval + ): + user_queue.popleft() + + # Проверка, превысил ли пользователь лимит сообщений + if len(user_queue) > self.max_messages: + # Блокировка пользователя + self.blocked_users[user_id] = current_time + timedelta( + seconds=self.block_time + ) + # Очистка очереди сообщений + del self.user_messages[user_id] + + await event.answer(text="🧊 Вы заморожены на 1 минуту за флуд!") + + # Отмена обработки сообщения и блокировка + return + + return await handler(event, data) + + +class BlacklistMiddleware(BaseMiddleware): + def __init__(self): + super().__init__() + + async def __call__(self, handler, event: types.Update, data: dict): + user_id = self.get_user_id(event) + if user_id: + if await orm.is_blacklisted(user_id): + return + + return await handler(event, data) + + def get_user_id(self, event: types.Update): + return event.from_user.id diff --git a/bot/requirements.txt b/bot/requirements.txt new file mode 100644 index 0000000..3a217a5 --- /dev/null +++ b/bot/requirements.txt @@ -0,0 +1,39 @@ +aiofiles==24.1.0 +aiogram==3.17.0 +aiohappyeyeballs==2.4.6 +aiohttp==3.11.12 +aiohttp-socks==0.10.1 +aiosignal==1.3.2 +annotated-types==0.7.0 +asyncio==3.4.3 +asyncpg==0.30.0 +attrs==25.1.0 +certifi==2025.1.31 +charset-normalizer==3.4.1 +dotenv-cli==3.4.1 +et_xmlfile==2.0.0 +frozenlist==1.5.0 +greenlet==3.1.1 +idna==3.10 +magic-filter==1.0.12 +markdown-it-py==3.0.0 +mdurl==0.1.2 +multidict==6.1.0 +openpyxl==3.1.5 +propcache==0.2.1 +pydantic==2.10.6 +pydantic_core==2.27.2 +Pygments==2.19.1 +python-decouple==3.8 +redis==5.2.1 +requests==2.32.3 +rich==13.9.4 +simplejson==3.20.1 +SQLAlchemy==2.0.38 +typing_extensions==4.12.2 +urllib3==2.3.0 +uvloop==0.21.0 +yarl==1.18.3 +fastapi +uvicorn +pytz diff --git a/bot/services/__init__.py b/bot/services/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bot/services/__init__.py @@ -0,0 +1 @@ + diff --git a/bot/services/rag_api.py b/bot/services/rag_api.py new file mode 100644 index 0000000..323bf1f --- /dev/null +++ b/bot/services/rag_api.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from typing import Any + +import aiohttp +from decouple import config + + +RAG_API_URL = config("RAG_API_URL", default="http://127.0.0.1:8001") +RAG_API_TIMEOUT_SECONDS = float(config("RAG_API_TIMEOUT_SECONDS", default="60")) + + +class RagApiError(Exception): + pass + + +async def ask_rag_api(message: str, history: list[dict[str, str]]) -> dict[str, Any]: + timeout = aiohttp.ClientTimeout(total=RAG_API_TIMEOUT_SECONDS) + payload = { + "message": message, + "history": history, + } + + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.post(f"{RAG_API_URL}/chat", json=payload) as response: + if response.status != 200: + text = await response.text() + raise RagApiError(f"RAG API returned {response.status}: {text}") + + return await response.json() + diff --git a/bot/states/__init__.py b/bot/states/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/states/admin_states.py b/bot/states/admin_states.py new file mode 100644 index 0000000..23ea749 --- /dev/null +++ b/bot/states/admin_states.py @@ -0,0 +1,36 @@ +# Aiogram imports +from aiogram.fsm.state import State, StatesGroup + + +class AdminStates(StatesGroup): + + main = State() + + +class AdminMailerStates(StatesGroup): + + post = State() + ikb = State() + preview = State() + + +class AdminManagementStates(StatesGroup): + + main = State() + + add_admin = State() + del_admin = State() + + +class AdminSettingsStates(StatesGroup): + + main = State() + edit_photo = State() + + +class AdminBlacklistStates(StatesGroup): + + main = State() + + add_blacklist = State() + del_blacklist = State() diff --git a/bot/states/client_states.py b/bot/states/client_states.py new file mode 100644 index 0000000..18879f4 --- /dev/null +++ b/bot/states/client_states.py @@ -0,0 +1,7 @@ +# Aiogram imports +from aiogram.fsm.state import State, StatesGroup + + +class MainStates(StatesGroup): + + main = State() diff --git a/bot/templates/users.xlsx b/bot/templates/users.xlsx new file mode 100644 index 0000000..0c7d879 Binary files /dev/null and b/bot/templates/users.xlsx differ diff --git a/bot/utils/__init__.py b/bot/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bot/utils/cfg_loader.py b/bot/utils/cfg_loader.py new file mode 100644 index 0000000..1e5243a --- /dev/null +++ b/bot/utils/cfg_loader.py @@ -0,0 +1,17 @@ +import simplejson as json + +# init +CFG_PATH = "cfg/config.json" + + +# load cfg and return it +def load_config(cfg_path=CFG_PATH): + + with open(cfg_path, "r", encoding="utf-8") as config_fp: + return json.load(config_fp) + + +def rewrite_config(obj, cfg_path=CFG_PATH): + + with open(cfg_path, "w", encoding="utf-8") as config_fp: + json.dump(obj, config_fp, indent=4) diff --git a/bot/utils/proxy.py b/bot/utils/proxy.py new file mode 100644 index 0000000..8a8314b --- /dev/null +++ b/bot/utils/proxy.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from aiohttp import BasicAuth +from aiogram.client.session.aiohttp import AiohttpSession +from decouple import config + + +SUPPORTED_PROXY_PROTOCOLS = {"http", "socks5"} + + +def build_bot_session() -> AiohttpSession | None: + proxy_raw = config("BOT_PROXY", default="").strip() + if not proxy_raw: + return None + + proxy = parse_proxy_value(proxy_raw) + return AiohttpSession(proxy=proxy) + + +def parse_proxy_value(proxy_raw: str) -> str | tuple[str, BasicAuth]: + if "://" in proxy_raw: + return proxy_raw + + parts = proxy_raw.split(":") + if len(parts) not in {3, 5}: + raise ValueError( + "BOT_PROXY must be in format protocol:ip:port or protocol:ip:port:user:pass" + ) + + protocol, host, port = parts[:3] + protocol = protocol.lower() + + if protocol not in SUPPORTED_PROXY_PROTOCOLS: + raise ValueError( + f"Unsupported proxy protocol '{protocol}'. Supported: http, socks5" + ) + + proxy_url = f"{protocol}://{host}:{port}" + + if len(parts) == 3: + return proxy_url + + username, password = parts[3], parts[4] + return proxy_url, BasicAuth(login=username, password=password) diff --git a/bot/utils/text_tools.py b/bot/utils/text_tools.py new file mode 100644 index 0000000..108b125 --- /dev/null +++ b/bot/utils/text_tools.py @@ -0,0 +1,119 @@ +import html +import re +from html.parser import HTMLParser + + +def to_html(obj): + return str(obj).replace("<", "<").replace(">", ">") + + +class TelegramHTMLSanitizer(HTMLParser): + ALLOWED_TAGS = {"b", "i", "u", "s", "code", "pre", "a"} + TAG_ALIASES = {"strong": "b", "em": "i"} + ALLOWED_HREF_PREFIXES = ("http://", "https://", "tg://", "mailto:") + + def __init__(self) -> None: + super().__init__(convert_charrefs=False) + self.parts: list[str] = [] + self.tag_stack: list[str] = [] + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + normalized_tag = self.TAG_ALIASES.get(tag, tag) + if normalized_tag not in self.ALLOWED_TAGS: + return + + if normalized_tag == "a": + href = next((value for key, value in attrs if key == "href" and value), None) + if not href or not href.startswith(self.ALLOWED_HREF_PREFIXES): + return + safe_href = html.escape(href, quote=True) + self.parts.append(f'') + self.tag_stack.append(normalized_tag) + return + + self.parts.append(f"<{normalized_tag}>") + self.tag_stack.append(normalized_tag) + + def handle_endtag(self, tag: str) -> None: + normalized_tag = self.TAG_ALIASES.get(tag, tag) + if normalized_tag not in self.ALLOWED_TAGS: + return + + for index in range(len(self.tag_stack) - 1, -1, -1): + if self.tag_stack[index] == normalized_tag: + del self.tag_stack[index] + self.parts.append(f"") + break + + def handle_data(self, data: str) -> None: + self.parts.append(html.escape(data, quote=False)) + + def handle_entityref(self, name: str) -> None: + self.parts.append(f"&{name};") + + def handle_charref(self, name: str) -> None: + self.parts.append(f"&#{name};") + + def get_html(self) -> str: + while self.tag_stack: + self.parts.append(f"") + return "".join(self.parts) + + +def markdown_to_telegram_html(text: str) -> str: + prepared = text.replace("\r\n", "\n").strip() + prepared = re.sub( + r"\[([^\]]+)\]\((https?://[^\s)]+)\)", + r'\1', + prepared, + ) + prepared = re.sub(r"\*\*(.+?)\*\*", r"\1", prepared, flags=re.DOTALL) + prepared = re.sub(r"__(.+?)__", r"\1", prepared, flags=re.DOTALL) + prepared = re.sub(r"(?m)^[ \t]*[*-]\s+", "• ", prepared) + return prepared + + +def format_telegram_html(text: str) -> str: + prepared = markdown_to_telegram_html(str(text)) + sanitizer = TelegramHTMLSanitizer() + sanitizer.feed(prepared) + sanitizer.close() + return sanitizer.get_html() + + +def parse_links_to_inline_markup(message: str) -> list: + """ + Парсит сообщение с форматированными ссылками и возвращает список рядов кнопок. + + Формат входного сообщения: + - [Текст кнопки + Ссылка] для одной кнопки. + - [Кнопка1 + Ссылка1][Кнопка2 + Ссылка2] для нескольких кнопок в одном ряду. + - Каждая строка представляет отдельный ряд кнопок. + + Пример: + [Кнопка1 + https://example.com] + [Кнопка2 + https://example.org][Кнопка3 + https://example.net] + + :param message: Строка с отформатированными ссылками. + :return: Список рядов кнопок, где каждый ряд — это список кортежей (Текст, Ссылка). + """ + # Исправленное регулярное выражение для поиска [Текст + Ссылка] + pattern = re.compile(r"\[([^\[\]+]+)\s*\+\s*(https?://[^\[\]]+)\]") + + # Инициализируем список рядов кнопок + keyboard_rows = [] + + # Разбиваем сообщение на строки + lines = message.strip().split("\n") + + for line in lines: + # Находим все совпадения в строке + matches = pattern.findall(line) + if matches: + row = [] + for text, url in matches: + button = (text.strip(), url.strip()) + row.append(button) + keyboard_rows.append(row) + + return keyboard_rows diff --git a/bot/webhooks.py b/bot/webhooks.py new file mode 100644 index 0000000..05f4eb1 --- /dev/null +++ b/bot/webhooks.py @@ -0,0 +1,16 @@ +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse + +app = FastAPI() + + +@app.get("/") +async def root(): + return {"message": "Hello, this is the test webhook endpoint!"} + + +@app.post("/webhook") +async def webhook(request: Request): + data = await request.json() + + return JSONResponse(content={"status": "ok", "data": data}) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..60eddd3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,88 @@ +name: gorych-bot + +x-default-logging: &default-logging + logging: + driver: json-file + options: + max-size: "10m" + max-file: "3" + +services: + bot: + <<: *default-logging + build: + context: . + dockerfile: bot/Dockerfile + restart: unless-stopped + env_file: + - ./bot/.env + - ./postgres.env + environment: + - REDIS_URL=redis://redisdb:6379/0 + - RAG_API_URL=http://rag_api:8000 + depends_on: + redisdb: + condition: service_healthy + postgredb: + condition: service_healthy + rag_api: + condition: service_started + command: python aiogram_run.py + + menu_scraper: + <<: *default-logging + build: + context: . + dockerfile: menu_scraper/Dockerfile + restart: unless-stopped + env_file: + - ./menu_scraper/.env + - ./postgres.env + volumes: + - ./data:/data + ports: + - "8010:8010" + + rag_api: + <<: *default-logging + build: + context: . + dockerfile: rag_api/Dockerfile + restart: unless-stopped + env_file: + - ./rag_api/.env + - ./postgres.env + environment: + - ANONYMIZED_TELEMETRY=false + - HF_HOME=/data/huggingface + - TRANSFORMERS_CACHE=/data/huggingface + - HUGGINGFACE_HUB_CACHE=/data/huggingface + - HUGGINGFACE_CACHE_DIR=/data/huggingface + depends_on: + - menu_scraper + volumes: + - ./data:/data + ports: + - "8001:8000" + + redisdb: + <<: *default-logging + image: redis:6-alpine + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 5s + timeout: 5s + retries: 5 + + postgredb: + <<: *default-logging + image: postgres:16-alpine + restart: unless-stopped + env_file: + - ./postgres.env + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ruby -d postgres"] + interval: 5s + timeout: 5s + retries: 5 diff --git a/menu_scraper/.env.example b/menu_scraper/.env.example new file mode 100644 index 0000000..cd8cd84 --- /dev/null +++ b/menu_scraper/.env.example @@ -0,0 +1,4 @@ +GORICH_SITE_URL=https://gorych34.ru/ +MENU_OUTPUT_PATH=/data/menu/gorich_menu.json +REQUEST_TIMEOUT_SECONDS=20 +SCRAPE_ON_STARTUP=true diff --git a/menu_scraper/Dockerfile b/menu_scraper/Dockerfile new file mode 100644 index 0000000..9548a6e --- /dev/null +++ b/menu_scraper/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.12-slim + +WORKDIR /app + +COPY menu_scraper/requirements.txt /app/requirements.txt + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt + +COPY menu_scraper/app /app/app + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8010"] + diff --git a/menu_scraper/app/__init__.py b/menu_scraper/app/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/menu_scraper/app/__init__.py @@ -0,0 +1 @@ + diff --git a/menu_scraper/app/config.py b/menu_scraper/app/config.py new file mode 100644 index 0000000..0528d05 --- /dev/null +++ b/menu_scraper/app/config.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + + +@dataclass(slots=True) +class Settings: + site_url: str = os.getenv("GORICH_SITE_URL", "https://gorych34.ru/") + output_path: str = os.getenv("MENU_OUTPUT_PATH", "/data/menu/gorich_menu.json") + request_timeout: float = float(os.getenv("REQUEST_TIMEOUT_SECONDS", "20")) + scrape_on_startup: bool = os.getenv("SCRAPE_ON_STARTUP", "true").lower() == "true" + + +settings = Settings() diff --git a/menu_scraper/app/main.py b/menu_scraper/app/main.py new file mode 100644 index 0000000..38eed82 --- /dev/null +++ b/menu_scraper/app/main.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import json +from contextlib import asynccontextmanager +from pathlib import Path + +from fastapi import FastAPI, HTTPException + +from .config import settings +from .models import MenuSnapshot +from .scraper import GorichMenuScraper + + +scraper = GorichMenuScraper() +output_path = Path(settings.output_path) + + +@asynccontextmanager +async def lifespan(_: FastAPI): + if settings.scrape_on_startup: + await scraper.scrape_and_save() + yield + + +app = FastAPI( + title="Gorich Menu Scraper", + version="1.0.0", + lifespan=lifespan, +) + + +def load_snapshot_from_disk() -> MenuSnapshot: + if not output_path.exists(): + raise HTTPException(status_code=404, detail="Menu snapshot not found") + + data = json.loads(output_path.read_text(encoding="utf-8")) + return MenuSnapshot.model_validate(data) + + +@app.get("/health") +async def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/scrape", response_model=MenuSnapshot) +async def scrape_menu() -> MenuSnapshot: + return await scraper.scrape_and_save() + + +@app.get("/items", response_model=MenuSnapshot) +async def get_items() -> MenuSnapshot: + return load_snapshot_from_disk() + + +@app.get("/items/{item_id}") +async def get_item(item_id: str) -> dict[str, object]: + snapshot = load_snapshot_from_disk() + for item in snapshot.items: + if item.item_id == item_id: + return item.model_dump(mode="json") + + raise HTTPException(status_code=404, detail="Menu item not found") + diff --git a/menu_scraper/app/models.py b/menu_scraper/app/models.py new file mode 100644 index 0000000..de24a77 --- /dev/null +++ b/menu_scraper/app/models.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + + +class MenuItem(BaseModel): + item_id: str + name: str + category: str + description: str + ingredients: list[str] + price: int | None = None + price_label: str + size: str | None = None + photo_url: str + source_url: str + scraped_at: datetime + metadata: dict[str, Any] = Field(default_factory=dict) + + +class MenuSnapshot(BaseModel): + source_url: str + scraped_at: datetime + total_items: int + items: list[MenuItem] + diff --git a/menu_scraper/app/scraper.py b/menu_scraper/app/scraper.py new file mode 100644 index 0000000..0a3aad7 --- /dev/null +++ b/menu_scraper/app/scraper.py @@ -0,0 +1,309 @@ +from __future__ import annotations + +import json +import re +from datetime import datetime, timezone +from pathlib import Path +from urllib.parse import urljoin + +import httpx +from bs4 import BeautifulSoup + +from .config import settings +from .models import MenuItem, MenuSnapshot + + +SHOP_PAYLOAD_MARKERS = ( + "MsJsShop.init(", + "MsJsPublishedManager.addJsData(", +) +SIZE_PATTERN = re.compile( + r"(\d+\s*(?:см|г|мл)(?:\s*/\s*\d+\s*(?:см|г|мл))*)", + re.IGNORECASE, +) + + +def normalize_spaces(value: str) -> str: + return " ".join(value.replace("\xa0", " ").split()) + + +def compact_text(value: str) -> str: + return re.sub(r"\s+", "", value.replace("\xa0", " ")).lower() + + +def parse_price(price_label: str) -> int | None: + cleaned = normalize_spaces(price_label).lower() + if "бесплатно" in cleaned: + return None + + digits = re.sub(r"[^\d]", "", cleaned) + return int(digits) if digits else None + + +def parse_ingredients(description: str) -> list[str]: + cleaned = normalize_spaces(description) + if not cleaned: + return [] + + lower_cleaned = cleaned.lower() + if lower_cleaned.startswith("состав:"): + cleaned = cleaned.split(":", 1)[1].strip() + + return [part.strip() for part in cleaned.split(",") if part.strip()] + + +def extract_size(*values: str) -> str | None: + for value in values: + match = SIZE_PATTERN.search(value) + if match: + return match.group(1).replace(" ", "") + return None + + +def is_size_only_line(value: str) -> bool: + size = extract_size(value) + return size is not None and compact_text(value) == compact_text(size) + + +def extract_first_json_object(html: str, marker: str) -> dict[str, object]: + marker_index = html.find(marker) + if marker_index == -1: + raise ValueError(f"{marker} payload not found in page") + + object_start = html.find("{", marker_index) + if object_start == -1: + raise ValueError("Shop payload start not found") + + depth = 0 + in_string = False + escaped = False + object_end = None + + for index in range(object_start, len(html)): + char = html[index] + + if in_string: + if escaped: + escaped = False + elif char == "\\": + escaped = True + elif char == '"': + in_string = False + continue + + if char == '"': + in_string = True + elif char == "{": + depth += 1 + elif char == "}": + depth -= 1 + if depth == 0: + object_end = index + 1 + break + + if object_end is None: + raise ValueError("Shop payload end not found") + + return json.loads(html[object_start:object_end]) + + +def find_shop_container(payload: object) -> dict[str, object] | None: + if isinstance(payload, dict): + shop = payload.get("shop") + if isinstance(shop, dict) and isinstance(shop.get("products"), list): + return payload + + ds_shop = payload.get("dsShop") + if isinstance(ds_shop, dict) and isinstance(ds_shop.get("data"), list): + return { + "shop": { + "products": ds_shop.get("data", []), + "settings": ds_shop.get("settings", {}), + } + } + + for value in payload.values(): + found = find_shop_container(value) + if found: + return found + + if isinstance(payload, list): + for value in payload: + found = find_shop_container(value) + if found: + return found + + return None + + +def extract_shop_payload(html: str) -> dict[str, object]: + errors: list[str] = [] + for marker in SHOP_PAYLOAD_MARKERS: + try: + payload = extract_first_json_object(html, marker) + except ValueError as exc: + errors.append(str(exc)) + continue + + shop_container = find_shop_container(payload) + if shop_container is not None: + return shop_container + + errors.append(f"{marker} found, but shop container is missing") + + raise ValueError("; ".join(errors) or "Shop payload not found in page") + + +def html_fragment_to_lines(fragment: str) -> list[str]: + if not fragment: + return [] + + soup = BeautifulSoup(fragment, "html.parser") + return [ + normalize_spaces(line) + for line in soup.get_text("\n", strip=True).splitlines() + if normalize_spaces(line) + ] + + +class GorichMenuScraper: + def __init__(self) -> None: + self.site_url = settings.site_url + self.output_path = Path(settings.output_path) + self.timeout = settings.request_timeout + + async def fetch_html(self) -> str: + async with self._build_client() as client: + response = await client.get(self.site_url) + response.raise_for_status() + return response.text + + def _build_client(self) -> httpx.AsyncClient: + return httpx.AsyncClient( + headers={"User-Agent": "Mozilla/5.0"}, + follow_redirects=True, + timeout=self.timeout, + ) + + def parse_menu(self, html: str) -> MenuSnapshot: + payload = extract_shop_payload(html) + shop = payload.get("shop") or {} + if not isinstance(shop, dict): + raise ValueError("Shop payload has unexpected format") + + shop_settings = shop.get("settings") or {} + categories = shop_settings.get("categories") or [] + products = shop.get("products") or [] + if not isinstance(categories, list) or not isinstance(products, list): + raise ValueError("Shop categories or products have unexpected format") + + category_by_id: dict[int, dict[str, object]] = {} + for category in categories: + if not isinstance(category, dict): + continue + category_id = category.get("id") + if isinstance(category_id, int): + category_by_id[category_id] = category + + scraped_at = datetime.now(timezone.utc) + items: list[MenuItem] = [] + + for product in products: + if not isinstance(product, dict): + continue + if not product.get("is_visible", True): + continue + + product_id = product.get("id") + name = normalize_spaces(str(product.get("name", ""))) + if not product_id or not name: + continue + + raw_description = str(product.get("short_description", "") or "") + description_lines = html_fragment_to_lines(raw_description) + size = extract_size(name, *description_lines) + description_parts = [line for line in description_lines if not is_size_only_line(line)] + description = " ".join(description_parts).strip() + if not description and description_lines: + description = " ".join(description_lines).strip() + + raw_category_ids = [ + category_id + for category_id in product.get("category_list", []) + if isinstance(category_id, int) + ] + sorted_category_ids = sorted( + raw_category_ids, + key=lambda category_id: int(category_by_id.get(category_id, {}).get("pos", 10_000)), + ) + category_name = "прочее" + primary_category_id: int | None = None + if sorted_category_ids: + primary_category_id = sorted_category_ids[0] + category_name = normalize_spaces( + str(category_by_id.get(primary_category_id, {}).get("name", "прочее")) + ).lower() + + image_url = "" + image_list = product.get("image_list", []) + if isinstance(image_list, list): + for image in image_list: + if not isinstance(image, dict): + continue + raw_url = str(image.get("url", "") or "") + if raw_url: + image_url = urljoin(self.site_url, raw_url) + break + + price = product.get("price") + numeric_price = int(price) if isinstance(price, int) else None + currency = normalize_spaces(str(product.get("currency", "руб.") or "руб.")) + price_label = ( + f"{numeric_price} {currency}" if numeric_price is not None else "Цена не указана" + ) + + description_url = str(product.get("description_url", "") or "") + source_url = urljoin(self.site_url, description_url) if description_url else self.site_url + + items.append( + MenuItem( + item_id=str(product_id), + name=name, + category=category_name, + description=description, + ingredients=parse_ingredients(description), + price=parse_price(price_label), + price_label=price_label, + size=size, + photo_url=image_url, + source_url=source_url, + scraped_at=scraped_at, + metadata={ + "category_id": primary_category_id, + "category_ids": sorted_category_ids, + "raw_short_description": raw_description, + "amount": product.get("amount"), + "sku": product.get("sku"), + }, + ) + ) + + return MenuSnapshot( + source_url=self.site_url, + scraped_at=scraped_at, + total_items=len(items), + items=items, + ) + + def save_snapshot(self, snapshot: MenuSnapshot) -> None: + self.output_path.parent.mkdir(parents=True, exist_ok=True) + self.output_path.write_text( + json.dumps(snapshot.model_dump(mode="json"), ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + async def scrape_and_save(self) -> MenuSnapshot: + html = await self.fetch_html() + snapshot = self.parse_menu(html) + self.save_snapshot(snapshot) + return snapshot diff --git a/menu_scraper/requirements.txt b/menu_scraper/requirements.txt new file mode 100644 index 0000000..0062891 --- /dev/null +++ b/menu_scraper/requirements.txt @@ -0,0 +1,6 @@ +beautifulsoup4==4.12.3 +fastapi==0.115.12 +httpx==0.28.1 +pydantic==2.11.4 +uvicorn==0.34.2 + diff --git a/postgres.env.example b/postgres.env.example new file mode 100644 index 0000000..a93160f --- /dev/null +++ b/postgres.env.example @@ -0,0 +1,5 @@ +POSTGRES_DB=gorych_bot_db +POSTGRES_USER='' +POSTGRES_PASSWORD='' +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 \ No newline at end of file diff --git a/rag_api/.env.example b/rag_api/.env.example new file mode 100644 index 0000000..171cf6e --- /dev/null +++ b/rag_api/.env.example @@ -0,0 +1,30 @@ +GORICH_SITE_URL=https://gorych34.ru/ + +# ChromaDB +CHROMA_PATH=/data/chroma +HUGGINGFACE_CACHE_DIR=/data/huggingface +KNOWLEDGE_COLLECTION=gorich_knowledge +MENU_COLLECTION=gorich_menu +MENU_SNAPSHOT_PATH=/data/menu/gorich_menu.json +ANONYMIZED_TELEMETRY=false + +# OpenRouter +OPENROUTER_API_KEY=your_openrouter_api_key +OPENROUTER_MODEL=mistralai/mistral-medium-3-5 +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 + +# Public app metadata +PUBLIC_APP_URL=http://localhost:8001 +PUBLIC_APP_NAME=Gorich Bot RAG + +# Embeddings +EMBEDDING_MODEL=sergeyzh/rubert-mini-frida +EMBEDDING_QUERY_PREFIX="search_query: " +EMBEDDING_DOCUMENT_PREFIX="search_document: " +EMBEDDING_MAX_LENGTH=512 +EMBEDDING_BATCH_SIZE=32 + +# RAG +REQUEST_TIMEOUT_SECONDS=60 +RAG_TOP_K=5 +INDEX_ON_STARTUP=true diff --git a/rag_api/Dockerfile b/rag_api/Dockerfile new file mode 100644 index 0000000..c109fd8 --- /dev/null +++ b/rag_api/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY rag_api/requirements.txt /app/requirements.txt + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==2.7.0 && \ + pip install --no-cache-dir -r requirements.txt + +COPY rag_api/app /app/app + +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/rag_api/app/__init__.py b/rag_api/app/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/rag_api/app/__init__.py @@ -0,0 +1 @@ + diff --git a/rag_api/app/config.py b/rag_api/app/config.py new file mode 100644 index 0000000..bb851d1 --- /dev/null +++ b/rag_api/app/config.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + + +@dataclass(slots=True) +class Settings: + app_name: str = "Gorich RAG API" + site_url: str = os.getenv("GORICH_SITE_URL", "https://gorych34.ru/") + chroma_path: str = os.getenv("CHROMA_PATH", "/data/chroma") + huggingface_cache_dir: str = os.getenv("HUGGINGFACE_CACHE_DIR", "/data/huggingface") + knowledge_collection: str = os.getenv("KNOWLEDGE_COLLECTION", "gorich_knowledge") + menu_collection: str = os.getenv("MENU_COLLECTION", "gorich_menu") + menu_snapshot_path: str = os.getenv("MENU_SNAPSHOT_PATH", "/data/menu/gorich_menu.json") + openrouter_api_key: str = os.getenv("OPENROUTER_API_KEY", "") + openrouter_model: str = os.getenv("OPENROUTER_MODEL", "mistralai/mistral-medium-3-5") + openrouter_base_url: str = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") + public_app_url: str = os.getenv("PUBLIC_APP_URL", "http://localhost:8000") + public_app_name: str = os.getenv("PUBLIC_APP_NAME", "Gorich Bot RAG") + embedding_model: str = os.getenv( + "EMBEDDING_MODEL", + "sergeyzh/rubert-mini-frida", + ) + embedding_query_prefix: str = os.getenv("EMBEDDING_QUERY_PREFIX", "search_query: ") + embedding_document_prefix: str = os.getenv( + "EMBEDDING_DOCUMENT_PREFIX", + "search_document: ", + ) + embedding_max_length: int = int(os.getenv("EMBEDDING_MAX_LENGTH", "512")) + embedding_batch_size: int = int(os.getenv("EMBEDDING_BATCH_SIZE", "32")) + request_timeout: float = float(os.getenv("REQUEST_TIMEOUT_SECONDS", "60")) + top_k: int = int(os.getenv("RAG_TOP_K", "5")) + index_on_startup: bool = os.getenv("INDEX_ON_STARTUP", "true").lower() == "true" + + +settings = Settings() diff --git a/rag_api/app/embeddings.py b/rag_api/app/embeddings.py new file mode 100644 index 0000000..5475ef0 --- /dev/null +++ b/rag_api/app/embeddings.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Iterable + +import torch +import torch.nn.functional as functional +from transformers import AutoModel, AutoTokenizer + +from .config import settings + + +class RuBertMiniFridaEmbedder: + def __init__(self) -> None: + torch.set_grad_enabled(False) + self.device = "cpu" + self.max_length = settings.embedding_max_length + self.batch_size = settings.embedding_batch_size + self.cache_dir = Path(settings.huggingface_cache_dir) + self.cache_dir.mkdir(parents=True, exist_ok=True) + self.tokenizer = AutoTokenizer.from_pretrained( + settings.embedding_model, + cache_dir=str(self.cache_dir), + ) + self.model = AutoModel.from_pretrained( + settings.embedding_model, + cache_dir=str(self.cache_dir), + ) + self.model.to(self.device) + self.model.eval() + + @staticmethod + def mean_pool(hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor: + masked_state = hidden_state * attention_mask.unsqueeze(-1).float() + summed = torch.sum(masked_state, dim=1) + counts = attention_mask.sum(dim=1, keepdim=True).float() + return summed / counts + + def _encode(self, texts: Iterable[str], prompt: str) -> list[list[float]]: + prepared_texts = [f"{prompt}{text}" for text in texts] + if not prepared_texts: + return [] + + embeddings: list[list[float]] = [] + for start in range(0, len(prepared_texts), self.batch_size): + batch = prepared_texts[start : start + self.batch_size] + encoded = self.tokenizer( + batch, + max_length=self.max_length, + padding=True, + truncation=True, + return_tensors="pt", + ) + encoded = {key: value.to(self.device) for key, value in encoded.items()} + outputs = self.model(**encoded) + pooled = self.mean_pool(outputs.last_hidden_state, encoded["attention_mask"]) + normalized = functional.normalize(pooled, p=2, dim=1) + embeddings.extend(normalized.cpu().tolist()) + return embeddings + + def embed_documents(self, texts: Iterable[str]) -> list[list[float]]: + return self._encode(texts, prompt=settings.embedding_document_prefix) + + def embed_queries(self, texts: Iterable[str]) -> list[list[float]]: + return self._encode(texts, prompt=settings.embedding_query_prefix) diff --git a/rag_api/app/main.py b/rag_api/app/main.py new file mode 100644 index 0000000..cfec2e2 --- /dev/null +++ b/rag_api/app/main.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +from contextlib import asynccontextmanager + +from fastapi import FastAPI + +from .config import settings +from .menu_catalog import MenuCatalog +from .models import ChatRequest, ChatResponse, IndexResponse +from .service import RagService + + +rag_service = RagService() +menu_catalog = MenuCatalog() + + +@asynccontextmanager +async def lifespan(_: FastAPI): + if settings.index_on_startup: + await rag_service.reindex() + yield + + +app = FastAPI(title=settings.app_name, version="1.0.0", lifespan=lifespan) + + +@app.get("/health") +async def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/chat", response_model=ChatResponse) +async def chat(request: ChatRequest) -> ChatResponse: + return await rag_service.chat(request) + + +@app.post("/admin/reindex", response_model=IndexResponse) +async def reindex() -> IndexResponse: + return await rag_service.reindex() + + +@app.get("/menu/search") +async def search_menu( + query: str = "", + max_price: int | None = None, + category: str | None = None, + must_include: str | None = None, + must_not_include: str | None = None, + limit: int = 5, +) -> dict[str, object]: + return { + "items": rag_service.search_menu( + query=query, + max_price=max_price, + category=category, + must_include=[value.strip() for value in must_include.split(",")] + if must_include + else None, + must_not_include=[value.strip() for value in must_not_include.split(",")] + if must_not_include + else None, + limit=limit, + ) + } diff --git a/rag_api/app/menu_catalog.py b/rag_api/app/menu_catalog.py new file mode 100644 index 0000000..2dfe630 --- /dev/null +++ b/rag_api/app/menu_catalog.py @@ -0,0 +1,214 @@ +from __future__ import annotations + +import json +import re +from pathlib import Path + +from .config import settings +from .models import MenuItem, MenuSnapshot + + +def tokenize(value: str) -> list[str]: + raw_tokens = re.findall(r"[a-zA-Zа-яА-Я0-9]+", value.lower()) + return [ + token + for token in raw_tokens + if token not in QUERY_STOPWORDS and (len(token) > 2 or token.isdigit()) + ] + + +QUERY_STOPWORDS = { + "что", + "у", + "вас", + "есть", + "из", + "как", + "ли", + "мне", + "могу", + "хочу", + "надо", + "для", + "под", + "про", + "или", + "это", + "эта", + "этот", + "какой", + "какая", + "какие", + "посоветуй", + "посоветуйте", + "подбери", + "подобрать", + "вкусную", + "вкусный", + "вкусное", +} + + +QUERY_HINTS = { + "шаурма": ["шаурма", "классика"], + "шаурмы": ["шаурма", "классика"], + "шаверма": ["шаурма", "классика"], + "шавуха": ["шаурма", "классика"], + "острый": ["халапеньо", "шрирача", "том", "ям"], + "острая": ["халапеньо", "шрирача", "том", "ям"], + "острое": ["халапеньо", "шрирача", "том", "ям"], + "острого": ["халапеньо", "шрирача", "том", "ям"], + "пикантный": ["халапеньо", "шрирача", "том", "ям"], + "сыр": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], + "сыром": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], + "сыра": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], + "сырный": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], + "сырная": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], + "рыбный": ["лосось"], + "рыбная": ["лосось"], + "мясной": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"], + "мясная": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"], +} + +CATEGORY_ALIASES = { + "шаурмы": "шаурма", + "шаверма": "шаурма", + "шавуха": "шаурма", +} + + +class MenuCatalog: + def __init__(self) -> None: + self.snapshot_path = Path(settings.menu_snapshot_path) + + def exists(self) -> bool: + return self.snapshot_path.exists() + + def load_snapshot(self) -> MenuSnapshot: + data = json.loads(self.snapshot_path.read_text(encoding="utf-8")) + return MenuSnapshot.model_validate(data) + + def menu_documents(self) -> list[tuple[MenuItem, str]]: + if not self.exists(): + return [] + + snapshot = self.load_snapshot() + documents: list[tuple[MenuItem, str]] = [] + for item in snapshot.items: + text = " | ".join( + [ + item.name, + item.category, + item.description, + ", ".join(item.ingredients), + item.size or "", + item.price_label, + ] + ) + documents.append((item, text)) + return documents + + def items_map(self) -> dict[str, MenuItem]: + if not self.exists(): + return {} + + snapshot = self.load_snapshot() + return {item.item_id: item for item in snapshot.items} + + def search( + self, + query: str = "", + max_price: int | None = None, + category: str | None = None, + must_include: list[str] | None = None, + must_not_include: list[str] | None = None, + limit: int = 5, + candidate_ids: list[str] | None = None, + semantic_ranks: dict[str, int] | None = None, + ) -> list[dict[str, object]]: + if not self.exists(): + return [] + + must_include = [value.lower() for value in (must_include or [])] + must_not_include = [value.lower() for value in (must_not_include or [])] + query_tokens = tokenize(query) + normalized_category = category.lower() if category else None + if normalized_category in CATEGORY_ALIASES: + normalized_category = CATEGORY_ALIASES[normalized_category] + hint_tokens = [] + for token in query_tokens: + hint_tokens.extend(QUERY_HINTS.get(token, [])) + candidate_set = set(candidate_ids or []) + semantic_ranks = semantic_ranks or {} + + scored_items: list[tuple[int, MenuItem]] = [] + for item, text in self.menu_documents(): + if candidate_set and item.item_id not in candidate_set: + continue + + lowered = text.lower() + + if normalized_category and item.category.lower() != normalized_category: + continue + if max_price is not None and item.price is not None and item.price > max_price: + continue + if max_price is not None and item.price is None: + continue + if any(value not in lowered for value in must_include): + continue + if any(value in lowered for value in must_not_include): + continue + + score = 0 + for token in query_tokens: + if token in lowered: + score += 3 + if token in item.name.lower(): + score += 5 + + for token in hint_tokens: + if token in lowered: + score += 6 + if token == item.category.lower(): + score += 8 + + for token in must_include: + if token in lowered: + score += 4 + + if item.item_id in semantic_ranks: + score += max(0, 20 - semantic_ranks[item.item_id]) + + if not query_tokens and not must_include and category: + score += 1 + + scored_items.append((score, item)) + + scored_items.sort( + key=lambda row: ( + row[0], + -(row[1].price or 0), + row[1].name, + ), + reverse=True, + ) + + results: list[dict[str, object]] = [] + for score, item in scored_items[:limit]: + results.append( + { + "item_id": item.item_id, + "name": item.name, + "category": item.category, + "description": item.description, + "ingredients": item.ingredients, + "price": item.price, + "price_label": item.price_label, + "size": item.size, + "photo_url": item.photo_url, + "source_url": item.source_url, + "score": score, + } + ) + + return results diff --git a/rag_api/app/models.py b/rag_api/app/models.py new file mode 100644 index 0000000..86f855f --- /dev/null +++ b/rag_api/app/models.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from datetime import datetime +from typing import Any + +from pydantic import BaseModel, Field + + +class ChatMessage(BaseModel): + role: str + content: str + + +class ChatRequest(BaseModel): + message: str + history: list[ChatMessage] = Field(default_factory=list) + + +class SourceDocument(BaseModel): + source_id: str + source_type: str + title: str + source_url: str + snippet: str + published_at: datetime | None = None + score: float | None = None + + +class ChatResponse(BaseModel): + answer: str + model: str + sources: list[SourceDocument] + tool_results: list[dict[str, Any]] = Field(default_factory=list) + + +class IndexResponse(BaseModel): + indexed_knowledge_documents: int + indexed_menu_documents: int + menu_items_loaded: int + + +class KnowledgeDocument(BaseModel): + doc_id: str + title: str + text: str + source_type: str + source_url: str + published_at: datetime | None = None + metadata: dict[str, Any] = Field(default_factory=dict) + + +class MenuItem(BaseModel): + item_id: str + name: str + category: str + description: str + ingredients: list[str] + price: int | None = None + price_label: str + size: str | None = None + photo_url: str + source_url: str + scraped_at: datetime + metadata: dict[str, Any] = Field(default_factory=dict) + + +class MenuSnapshot(BaseModel): + source_url: str + scraped_at: datetime + total_items: int + items: list[MenuItem] + diff --git a/rag_api/app/noop_telemetry.py b/rag_api/app/noop_telemetry.py new file mode 100644 index 0000000..75d6db7 --- /dev/null +++ b/rag_api/app/noop_telemetry.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from chromadb.telemetry.product import ProductTelemetryClient, ProductTelemetryEvent +from overrides import override + + +class NoOpProductTelemetry(ProductTelemetryClient): + @override + def capture(self, event: ProductTelemetryEvent) -> None: + return None diff --git a/rag_api/app/openrouter_client.py b/rag_api/app/openrouter_client.py new file mode 100644 index 0000000..4fbaba6 --- /dev/null +++ b/rag_api/app/openrouter_client.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from typing import Any + +import httpx + +from .config import settings + + +class OpenRouterClient: + def __init__(self) -> None: + self.base_url = settings.openrouter_base_url.rstrip("/") + self.api_key = settings.openrouter_api_key + self.model = settings.openrouter_model + self.timeout = settings.request_timeout + + async def chat_completion( + self, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + tool_choice: str | dict[str, Any] | None = None, + temperature: float = 0.2, + ) -> dict[str, Any]: + if not self.api_key: + raise RuntimeError("OPENROUTER_API_KEY is not configured") + + payload: dict[str, Any] = { + "model": self.model, + "messages": messages, + "temperature": temperature, + } + if tools: + payload["tools"] = tools + payload["tool_choice"] = tool_choice or "auto" + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + "HTTP-Referer": settings.public_app_url, + "X-Title": settings.public_app_name, + } + + async with httpx.AsyncClient(timeout=self.timeout) as client: + response = await client.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=payload, + ) + response.raise_for_status() + return response.json() + diff --git a/rag_api/app/service.py b/rag_api/app/service.py new file mode 100644 index 0000000..7c6e68b --- /dev/null +++ b/rag_api/app/service.py @@ -0,0 +1,306 @@ +from __future__ import annotations + +import json +from typing import Any + +from .config import settings +from .embeddings import RuBertMiniFridaEmbedder +from .menu_catalog import MenuCatalog +from .models import ChatRequest, ChatResponse, IndexResponse, KnowledgeDocument, SourceDocument +from .openrouter_client import OpenRouterClient +from .site_scraper import SiteKnowledgeScraper +from .vector_store import VectorStore + + +class RagService: + def __init__(self) -> None: + self.vector_store = VectorStore() + self.embedder = RuBertMiniFridaEmbedder() + self.site_scraper = SiteKnowledgeScraper() + self.menu_catalog = MenuCatalog() + self.openrouter = OpenRouterClient() + self.knowledge_collection = self.vector_store.get_collection( + settings.knowledge_collection + ) + self.menu_collection = self.vector_store.get_collection(settings.menu_collection) + + @staticmethod + def clear_collection(collection: Any) -> None: + ids = collection.get(include=[])["ids"] + if ids: + collection.delete(ids=ids) + + async def reindex(self) -> IndexResponse: + knowledge_documents = await self.site_scraper.scrape() + self.clear_collection(self.knowledge_collection) + self.clear_collection(self.menu_collection) + + if knowledge_documents: + knowledge_texts = [doc.text for doc in knowledge_documents] + self.knowledge_collection.add( + ids=[doc.doc_id for doc in knowledge_documents], + documents=knowledge_texts, + embeddings=self.embedder.embed_documents(knowledge_texts), + metadatas=[ + { + "title": doc.title, + "source_type": doc.source_type, + "source_url": doc.source_url, + "published_at": doc.published_at.isoformat() + if doc.published_at + else "", + **doc.metadata, + } + for doc in knowledge_documents + ], + ) + + menu_documents = self.menu_catalog.menu_documents() + if menu_documents: + menu_texts = [document for _, document in menu_documents] + self.menu_collection.add( + ids=[item.item_id for item, _ in menu_documents], + documents=menu_texts, + embeddings=self.embedder.embed_documents(menu_texts), + metadatas=[ + { + "name": item.name, + "category": item.category, + "price": item.price if item.price is not None else -1, + "price_label": item.price_label, + "source_url": item.source_url, + "photo_url": item.photo_url, + } + for item, _ in menu_documents + ], + ) + + return IndexResponse( + indexed_knowledge_documents=len(knowledge_documents), + indexed_menu_documents=len(menu_documents), + menu_items_loaded=len(menu_documents), + ) + + def retrieve_knowledge(self, query: str) -> list[SourceDocument]: + if self.knowledge_collection.count() == 0: + return [] + + query_embedding = self.embedder.embed_queries([query])[0] + result = self.knowledge_collection.query( + query_embeddings=[query_embedding], + n_results=settings.top_k, + ) + documents = result.get("documents", [[]])[0] + metadatas = result.get("metadatas", [[]])[0] + distances = result.get("distances", [[]])[0] + ids = result.get("ids", [[]])[0] + + sources: list[SourceDocument] = [] + for index, document in enumerate(documents): + metadata = metadatas[index] + published_at = metadata.get("published_at") or None + sources.append( + SourceDocument( + source_id=ids[index], + source_type=str(metadata.get("source_type", "unknown")), + title=str(metadata.get("title", ids[index])), + source_url=str(metadata.get("source_url", settings.site_url)), + snippet=document[:400], + published_at=published_at, + score=distances[index] if index < len(distances) else None, + ) + ) + return sources + + def build_system_prompt(self, sources: list[SourceDocument]) -> str: + context_parts = [] + for source in sources: + published_label = ( + f" | дата: {source.published_at.isoformat()}" + if source.published_at + else "" + ) + context_parts.append( + f"[{source.source_type}] {source.title}{published_label}\n" + f"Источник: {source.source_url}\n" + f"{source.snippet}" + ) + + context_block = "\n\n".join(context_parts) if context_parts else "Нет найденного контекста." + return ( + "Ты помощник шаурмечной Горыч из Волгограда.\n" + "Отвечай по-русски, дружелюбно, естественно и клиентоориентированно.\n" + "Не начинай каждый ответ с нового приветствия.\n" + "Отвечай только на текущий вопрос пользователя и не повторяй без необходимости уже сказанное ранее.\n" + "Не перечисляй ассортимент без запроса. Если человек не просил список позиций, не превращай ответ в каталог.\n" + "Для рекомендаций предлагай максимум 3 конкретные позиции.\n" + "Не выдумывай факты. Если данные расходятся, прямо скажи об этом и укажи источник.\n" + "Если вопрос про режим работы, доставку, контакты, адрес, соцсети, способы заказа или общую информацию о заведении, отвечай по контексту и не используй tool меню.\n" + "Используй tool find_menu_items только когда пользователь явно просит подобрать, перечислить, сравнить или найти блюда из меню:\n" + "- что есть в меню;\n" + "- что посоветуете из конкретной категории;\n" + "- что есть с определённым ингредиентом;\n" + "- что можно до определённого бюджета;\n" + "- что острое, сырное, мясное и так далее, если нужен именно подбор позиций.\n" + "Если вопрос общий и консультативный, например про вкус, выбор мяса или что лучше взять в целом, сначала ответь по-человечески и не вызывай tool, пока пользователь не попросит конкретные позиции.\n" + "Если всё же используешь tool и он вернул позиции, назови их по именам и по возможности укажи цену.\n" + "Если tool ничего не нашёл, честно скажи об этом и предложи уточнить запрос.\n" + "Если в контексте есть даты, ориентируйся на более свежие данные.\n\n" + "Формат ответа:\n" + "- Используй только HTML-теги, подходящие для Telegram/aiogram: , , , , ,
, .\n"
+            "- Не используй Markdown со звёздочками, подчёркиваниями или решётками.\n"
+            "- Не пиши служебные фразы вроде 'выберите вопрос ниже'.\n\n"
+            f"Контекст RAG:\n{context_block}"
+        )
+
+    def build_tools(self) -> list[dict[str, Any]]:
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": "find_menu_items",
+                    "description": "Подбирает блюда из меню Горыча по описанию, бюджету, категории и ингредиентам. Использовать только для явных запросов о меню и конкретных позициях, не использовать для вопросов о режиме работы, доставке, контактах и общей информации о заведении.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "query": {
+                                "type": "string",
+                                "description": "Свободное описание того, что хочет пользователь.",
+                            },
+                            "max_price": {
+                                "type": "integer",
+                                "description": "Максимальная цена в рублях.",
+                            },
+                            "category": {
+                                "type": "string",
+                                "description": "Категория блюда, например: пицца, донар, шаурма.",
+                            },
+                            "must_include": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                                "description": "Ингредиенты или слова, которые желательно включить.",
+                            },
+                            "must_not_include": {
+                                "type": "array",
+                                "items": {"type": "string"},
+                                "description": "Ингредиенты или слова, которых нужно избегать.",
+                            },
+                            "limit": {
+                                "type": "integer",
+                                "description": "Максимум позиций в выдаче.",
+                                "default": 5,
+                            },
+                        },
+                        "required": [],
+                    },
+                },
+            }
+        ]
+
+    def run_tool(self, name: str, arguments: dict[str, Any]) -> list[dict[str, Any]]:
+        if name != "find_menu_items":
+            return []
+
+        return self.search_menu(
+            query=arguments.get("query", ""),
+            max_price=arguments.get("max_price"),
+            category=arguments.get("category"),
+            must_include=arguments.get("must_include"),
+            must_not_include=arguments.get("must_not_include"),
+            limit=arguments.get("limit", 5),
+        )
+
+    def search_menu(
+        self,
+        query: str = "",
+        max_price: int | None = None,
+        category: str | None = None,
+        must_include: list[str] | None = None,
+        must_not_include: list[str] | None = None,
+        limit: int = 5,
+    ) -> list[dict[str, Any]]:
+        candidate_ids: list[str] | None = None
+        semantic_ranks: dict[str, int] | None = None
+
+        if query and self.menu_collection.count() > 0:
+            query_embedding = self.embedder.embed_queries([query])[0]
+            semantic_result = self.menu_collection.query(
+                query_embeddings=[query_embedding],
+                n_results=min(max(limit * 4, 10), self.menu_collection.count()),
+            )
+            candidate_ids = semantic_result.get("ids", [[]])[0]
+            semantic_ranks = {
+                item_id: rank for rank, item_id in enumerate(candidate_ids, start=1)
+            }
+
+        return self.menu_catalog.search(
+            query=query,
+            max_price=max_price,
+            category=category,
+            must_include=must_include,
+            must_not_include=must_not_include,
+            limit=limit,
+            candidate_ids=candidate_ids,
+            semantic_ranks=semantic_ranks,
+        )
+
+    async def chat(self, request: ChatRequest) -> ChatResponse:
+        sources = self.retrieve_knowledge(request.message)
+        messages: list[dict[str, Any]] = [
+            {"role": "system", "content": self.build_system_prompt(sources)}
+        ]
+        for message in request.history:
+            messages.append({"role": message.role, "content": message.content})
+        messages.append({"role": "user", "content": request.message})
+
+        tools = self.build_tools()
+        first_response = await self.openrouter.chat_completion(
+            messages=messages,
+            tools=tools,
+        )
+        choice_message = first_response["choices"][0]["message"]
+        tool_calls = choice_message.get("tool_calls", [])
+        tool_results: list[dict[str, Any]] = []
+        model = first_response.get("model", settings.openrouter_model)
+
+        if tool_calls:
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": choice_message.get("content", ""),
+                    "tool_calls": tool_calls,
+                }
+            )
+
+            for tool_call in tool_calls:
+                function_name = tool_call["function"]["name"]
+                arguments = json.loads(tool_call["function"]["arguments"] or "{}")
+                result = self.run_tool(function_name, arguments)
+                tool_results.extend(result)
+                messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": tool_call["id"],
+                        "name": function_name,
+                        "content": json.dumps(result, ensure_ascii=False),
+                    }
+                )
+
+            final_response = await self.openrouter.chat_completion(messages=messages)
+            final_message = final_response["choices"][0]["message"]["content"]
+            model = final_response.get("model", settings.openrouter_model)
+
+            return ChatResponse(
+                answer=final_message,
+                model=model,
+                sources=sources,
+                    tool_results=tool_results,
+            )
+
+        answer = choice_message.get("content", "")
+        return ChatResponse(
+            answer=answer,
+            model=model,
+            sources=sources,
+            tool_results=tool_results,
+        )
diff --git a/rag_api/app/site_scraper.py b/rag_api/app/site_scraper.py
new file mode 100644
index 0000000..d8eb9b3
--- /dev/null
+++ b/rag_api/app/site_scraper.py
@@ -0,0 +1,226 @@
+from __future__ import annotations
+
+import re
+from datetime import datetime, timezone
+
+import httpx
+from bs4 import BeautifulSoup
+
+from .config import settings
+from .models import KnowledgeDocument
+
+
+MAP_PATTERN = re.compile(
+    r"yandexMaps\.addMap\('([^']+)'\s*,\s*'([^']+)'\s*,\s*'([^']+)'\)"
+)
+
+
+def normalize_spaces(value: str) -> str:
+    return " ".join(value.replace("\xa0", " ").split())
+
+
+def deduplicate_preserving_order(values: list[str]) -> list[str]:
+    seen: set[str] = set()
+    result: list[str] = []
+    for value in values:
+        if value and value not in seen:
+            seen.add(value)
+            result.append(value)
+    return result
+
+
+def is_meaningful_value(value: str) -> bool:
+    return any(char.isalnum() for char in value)
+
+
+class SiteKnowledgeScraper:
+    ABOUT_MARKER = "ТЕРРИТОРИЯ БЫСТРОГО ПИТАНИЯ В ВОЛГОГРАДЕ"
+    MENU_MARKER = "МЕНЮ"
+    DELIVERY_MARKER = "ДОСТАВКА"
+    CONTACT_MARKER = "КОНТАКТЫ"
+    CONTACT_END_MARKERS = ("Закрыть", "OK")
+
+    def __init__(self) -> None:
+        self.site_url = settings.site_url
+        self.timeout = settings.request_timeout
+
+    async def fetch_homepage(self) -> str:
+        async with httpx.AsyncClient(
+            headers={"User-Agent": "Mozilla/5.0"},
+            follow_redirects=True,
+            timeout=self.timeout,
+        ) as client:
+            response = await client.get(self.site_url)
+            response.raise_for_status()
+            return response.text
+
+    def visible_strings(self, soup: BeautifulSoup) -> list[str]:
+        return [
+            normalized
+            for text in soup.stripped_strings
+            for normalized in [normalize_spaces(text)]
+            if normalized and is_meaningful_value(normalized)
+        ]
+
+    def find_marker(self, values: list[str], marker: str, start: int = 0) -> int | None:
+        for index in range(start, len(values)):
+            if values[index] == marker:
+                return index
+        return None
+
+    def find_last_marker(self, values: list[str], marker: str, start: int = 0) -> int | None:
+        for index in range(len(values) - 1, start - 1, -1):
+            if values[index] == marker:
+                return index
+        return None
+
+    def slice_between_markers(
+        self,
+        values: list[str],
+        start_marker: str,
+        end_markers: tuple[str, ...],
+        start_at: int = 0,
+    ) -> list[str]:
+        start_index = self.find_marker(values, start_marker, start_at)
+        if start_index is None:
+            return []
+
+        end_index = len(values)
+        for marker in end_markers:
+            marker_index = self.find_marker(values, marker, start_index + 1)
+            if marker_index is not None:
+                end_index = min(end_index, marker_index)
+
+        return values[start_index:end_index]
+
+    def extract_social_links(self, soup: BeautifulSoup) -> list[str]:
+        links: list[str] = []
+        for node in soup.select("[data-page-link]"):
+            href = node.get("data-page-link")
+            label = normalize_spaces(node.get_text(" ", strip=True))
+            if not href:
+                continue
+            if label:
+                links.append(f"{label}: {href}")
+            else:
+                links.append(str(href))
+        return deduplicate_preserving_order(links)
+
+    def extract_map_coordinates(self, html: str) -> str | None:
+        match = MAP_PATTERN.search(html)
+        if not match:
+            return None
+        latitude = normalize_spaces(match.group(2))
+        longitude = normalize_spaces(match.group(3))
+        return f"{latitude}, {longitude}"
+
+    def parse_homepage(self, html: str) -> list[KnowledgeDocument]:
+        soup = BeautifulSoup(html, "html.parser")
+        strings = self.visible_strings(soup)
+        documents: list[KnowledgeDocument] = []
+        scraped_at = datetime.now(timezone.utc)
+
+        meta_description = soup.select_one('meta[name="description"]')
+        if meta_description and meta_description.get("content"):
+            documents.append(
+                KnowledgeDocument(
+                    doc_id="site-meta-description",
+                    title="Краткое описание заведения",
+                    text=normalize_spaces(meta_description["content"]),
+                    source_type="about",
+                    source_url=self.site_url,
+                    metadata={"scraped_at": scraped_at.isoformat()},
+                )
+            )
+
+        about_section = self.slice_between_markers(
+            strings,
+            self.ABOUT_MARKER,
+            (self.MENU_MARKER,),
+        )
+        if about_section:
+            documents.append(
+                KnowledgeDocument(
+                    doc_id="site-about",
+                    title=about_section[0],
+                    text="\n".join(deduplicate_preserving_order(about_section[1:])),
+                    source_type="about",
+                    source_url=self.site_url,
+                    metadata={"scraped_at": scraped_at.isoformat()},
+                )
+            )
+
+        social_links = self.extract_social_links(soup)
+        if social_links:
+            documents.append(
+                KnowledgeDocument(
+                    doc_id="site-links",
+                    title="Соцсети и внешние площадки",
+                    text="\n".join(social_links),
+                    source_type="links",
+                    source_url=self.site_url,
+                    metadata={"scraped_at": scraped_at.isoformat()},
+                )
+            )
+
+        menu_index = self.find_marker(strings, self.MENU_MARKER)
+        delivery_start = self.find_last_marker(
+            strings,
+            self.DELIVERY_MARKER,
+            start=(menu_index + 1) if menu_index is not None else 0,
+        )
+        contact_start = self.find_last_marker(
+            strings,
+            self.CONTACT_MARKER,
+            start=(delivery_start + 1) if delivery_start is not None else 0,
+        )
+        delivery_section = (
+            strings[delivery_start:contact_start]
+            if delivery_start is not None and contact_start is not None and contact_start > delivery_start
+            else []
+        )
+        if delivery_section:
+            documents.append(
+                KnowledgeDocument(
+                    doc_id="site-delivery",
+                    title=delivery_section[0],
+                    text="\n".join(deduplicate_preserving_order(delivery_section[1:])),
+                    source_type="delivery",
+                    source_url=self.site_url,
+                    metadata={"scraped_at": scraped_at.isoformat()},
+                )
+            )
+
+        auth_index = len(strings)
+        if contact_start is not None:
+            for marker in self.CONTACT_END_MARKERS:
+                marker_index = self.find_marker(strings, marker, contact_start + 1)
+                if marker_index is not None:
+                    auth_index = min(auth_index, marker_index)
+        contact_section = (
+            strings[contact_start:auth_index]
+            if contact_start is not None and auth_index > contact_start
+            else []
+        )
+        if contact_section:
+            metadata = {"scraped_at": scraped_at.isoformat()}
+            coordinates = self.extract_map_coordinates(html)
+            if coordinates:
+                metadata["map_coordinates"] = coordinates
+
+            documents.append(
+                KnowledgeDocument(
+                    doc_id="site-contact",
+                    title=contact_section[0],
+                    text="\n".join(deduplicate_preserving_order(contact_section[1:])),
+                    source_type="contact",
+                    source_url=self.site_url,
+                    metadata=metadata,
+                )
+            )
+
+        return documents
+
+    async def scrape(self) -> list[KnowledgeDocument]:
+        html = await self.fetch_homepage()
+        return self.parse_homepage(html)
diff --git a/rag_api/app/vector_store.py b/rag_api/app/vector_store.py
new file mode 100644
index 0000000..f5bd19e
--- /dev/null
+++ b/rag_api/app/vector_store.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from chromadb import PersistentClient
+from chromadb.api.models.Collection import Collection
+from chromadb.config import Settings as ChromaSettings
+
+from .config import settings
+
+
+class VectorStore:
+    def __init__(self) -> None:
+        chroma_settings = ChromaSettings(
+            anonymized_telemetry=False,
+            chroma_product_telemetry_impl="app.noop_telemetry.NoOpProductTelemetry",
+            chroma_telemetry_impl="app.noop_telemetry.NoOpProductTelemetry",
+        )
+        self.client = PersistentClient(
+            path=settings.chroma_path,
+            settings=chroma_settings,
+        )
+
+    def get_collection(self, name: str) -> Collection:
+        return self.client.get_or_create_collection(name=name)
diff --git a/rag_api/requirements.txt b/rag_api/requirements.txt
new file mode 100644
index 0000000..5541b54
--- /dev/null
+++ b/rag_api/requirements.txt
@@ -0,0 +1,7 @@
+beautifulsoup4==4.12.3
+chromadb==1.0.8
+fastapi==0.115.9
+httpx==0.28.1
+pydantic==2.11.4
+transformers==4.57.1
+uvicorn==0.34.2