first commit

2026-05-12 23:37:04 +03:00
commit aff0bc2990
67 changed files with 3984 additions and 0 deletions
@@ -0,0 +1,214 @@
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+
+from .config import settings
+from .models import MenuItem, MenuSnapshot
+
+
+def tokenize(value: str) -> list[str]:
+    raw_tokens = re.findall(r"[a-zA-Zа-яА-Я0-9]+", value.lower())
+    return [
+        token
+        for token in raw_tokens
+        if token not in QUERY_STOPWORDS and (len(token) > 2 or token.isdigit())
+    ]
+
+
+QUERY_STOPWORDS = {
+    "что",
+    "у",
+    "вас",
+    "есть",
+    "из",
+    "как",
+    "ли",
+    "мне",
+    "могу",
+    "хочу",
+    "надо",
+    "для",
+    "под",
+    "про",
+    "или",
+    "это",
+    "эта",
+    "этот",
+    "какой",
+    "какая",
+    "какие",
+    "посоветуй",
+    "посоветуйте",
+    "подбери",
+    "подобрать",
+    "вкусную",
+    "вкусный",
+    "вкусное",
+}
+
+
+QUERY_HINTS = {
+    "шаурма": ["шаурма", "классика"],
+    "шаурмы": ["шаурма", "классика"],
+    "шаверма": ["шаурма", "классика"],
+    "шавуха": ["шаурма", "классика"],
+    "острый": ["халапеньо", "шрирача", "том", "ям"],
+    "острая": ["халапеньо", "шрирача", "том", "ям"],
+    "острое": ["халапеньо", "шрирача", "том", "ям"],
+    "острого": ["халапеньо", "шрирача", "том", "ям"],
+    "пикантный": ["халапеньо", "шрирача", "том", "ям"],
+    "сыр": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
+    "сыром": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
+    "сыра": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
+    "сырный": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
+    "сырная": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
+    "рыбный": ["лосось"],
+    "рыбная": ["лосось"],
+    "мясной": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"],
+    "мясная": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"],
+}
+
+CATEGORY_ALIASES = {
+    "шаурмы": "шаурма",
+    "шаверма": "шаурма",
+    "шавуха": "шаурма",
+}
+
+
+class MenuCatalog:
+    def __init__(self) -> None:
+        self.snapshot_path = Path(settings.menu_snapshot_path)
+
+    def exists(self) -> bool:
+        return self.snapshot_path.exists()
+
+    def load_snapshot(self) -> MenuSnapshot:
+        data = json.loads(self.snapshot_path.read_text(encoding="utf-8"))
+        return MenuSnapshot.model_validate(data)
+
+    def menu_documents(self) -> list[tuple[MenuItem, str]]:
+        if not self.exists():
+            return []
+
+        snapshot = self.load_snapshot()
+        documents: list[tuple[MenuItem, str]] = []
+        for item in snapshot.items:
+            text = " | ".join(
+                [
+                    item.name,
+                    item.category,
+                    item.description,
+                    ", ".join(item.ingredients),
+                    item.size or "",
+                    item.price_label,
+                ]
+            )
+            documents.append((item, text))
+        return documents
+
+    def items_map(self) -> dict[str, MenuItem]:
+        if not self.exists():
+            return {}
+
+        snapshot = self.load_snapshot()
+        return {item.item_id: item for item in snapshot.items}
+
+    def search(
+        self,
+        query: str = "",
+        max_price: int | None = None,
+        category: str | None = None,
+        must_include: list[str] | None = None,
+        must_not_include: list[str] | None = None,
+        limit: int = 5,
+        candidate_ids: list[str] | None = None,
+        semantic_ranks: dict[str, int] | None = None,
+    ) -> list[dict[str, object]]:
+        if not self.exists():
+            return []
+
+        must_include = [value.lower() for value in (must_include or [])]
+        must_not_include = [value.lower() for value in (must_not_include or [])]
+        query_tokens = tokenize(query)
+        normalized_category = category.lower() if category else None
+        if normalized_category in CATEGORY_ALIASES:
+            normalized_category = CATEGORY_ALIASES[normalized_category]
+        hint_tokens = []
+        for token in query_tokens:
+            hint_tokens.extend(QUERY_HINTS.get(token, []))
+        candidate_set = set(candidate_ids or [])
+        semantic_ranks = semantic_ranks or {}
+
+        scored_items: list[tuple[int, MenuItem]] = []
+        for item, text in self.menu_documents():
+            if candidate_set and item.item_id not in candidate_set:
+                continue
+
+            lowered = text.lower()
+
+            if normalized_category and item.category.lower() != normalized_category:
+                continue
+            if max_price is not None and item.price is not None and item.price > max_price:
+                continue
+            if max_price is not None and item.price is None:
+                continue
+            if any(value not in lowered for value in must_include):
+                continue
+            if any(value in lowered for value in must_not_include):
+                continue
+
+            score = 0
+            for token in query_tokens:
+                if token in lowered:
+                    score += 3
+                if token in item.name.lower():
+                    score += 5
+
+            for token in hint_tokens:
+                if token in lowered:
+                    score += 6
+                if token == item.category.lower():
+                    score += 8
+
+            for token in must_include:
+                if token in lowered:
+                    score += 4
+
+            if item.item_id in semantic_ranks:
+                score += max(0, 20 - semantic_ranks[item.item_id])
+
+            if not query_tokens and not must_include and category:
+                score += 1
+
+            scored_items.append((score, item))
+
+        scored_items.sort(
+            key=lambda row: (
+                row[0],
+                -(row[1].price or 0),
+                row[1].name,
+            ),
+            reverse=True,
+        )
+
+        results: list[dict[str, object]] = []
+        for score, item in scored_items[:limit]:
+            results.append(
+                {
+                    "item_id": item.item_id,
+                    "name": item.name,
+                    "category": item.category,
+                    "description": item.description,
+                    "ingredients": item.ingredients,
+                    "price": item.price,
+                    "price_label": item.price_label,
+                    "size": item.size,
+                    "photo_url": item.photo_url,
+                    "source_url": item.source_url,
+                    "score": score,
+                }
+            )
+
+        return results