from __future__ import annotations import json import re from pathlib import Path from .config import settings from .models import MenuItem, MenuSnapshot def tokenize(value: str) -> list[str]: raw_tokens = re.findall(r"[a-zA-Zа-яА-Я0-9]+", value.lower()) return [ token for token in raw_tokens if token not in QUERY_STOPWORDS and (len(token) > 2 or token.isdigit()) ] QUERY_STOPWORDS = { "что", "у", "вас", "есть", "из", "как", "ли", "мне", "могу", "хочу", "надо", "для", "под", "про", "или", "это", "эта", "этот", "какой", "какая", "какие", "посоветуй", "посоветуйте", "подбери", "подобрать", "вкусную", "вкусный", "вкусное", } QUERY_HINTS = { "шаурма": ["шаурма", "классика"], "шаурмы": ["шаурма", "классика"], "шаверма": ["шаурма", "классика"], "шавуха": ["шаурма", "классика"], "острый": ["халапеньо", "шрирача", "том", "ям"], "острая": ["халапеньо", "шрирача", "том", "ям"], "острое": ["халапеньо", "шрирача", "том", "ям"], "острого": ["халапеньо", "шрирача", "том", "ям"], "пикантный": ["халапеньо", "шрирача", "том", "ям"], "сыр": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], "сыром": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], "сыра": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], "сырный": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], "сырная": ["сыр", "моцарелла", "пармезан", "крем", "чиз"], "рыбный": ["лосось"], "рыбная": ["лосось"], "мясной": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"], "мясная": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"], } CATEGORY_ALIASES = { "шаурмы": "шаурма", "шаверма": "шаурма", "шавуха": "шаурма", } class MenuCatalog: def __init__(self) -> None: self.snapshot_path = Path(settings.menu_snapshot_path) def exists(self) -> bool: return self.snapshot_path.exists() def load_snapshot(self) -> MenuSnapshot: data = json.loads(self.snapshot_path.read_text(encoding="utf-8")) return MenuSnapshot.model_validate(data) def menu_documents(self) -> list[tuple[MenuItem, str]]: if not self.exists(): return [] snapshot = self.load_snapshot() documents: list[tuple[MenuItem, str]] = [] for item in snapshot.items: text = " | ".join( [ item.name, item.category, item.description, ", ".join(item.ingredients), item.size or "", item.price_label, ] ) documents.append((item, text)) return documents def items_map(self) -> dict[str, MenuItem]: if not self.exists(): return {} snapshot = self.load_snapshot() return {item.item_id: item for item in snapshot.items} def search( self, query: str = "", max_price: int | None = None, category: str | None = None, must_include: list[str] | None = None, must_not_include: list[str] | None = None, limit: int = 5, candidate_ids: list[str] | None = None, semantic_ranks: dict[str, int] | None = None, ) -> list[dict[str, object]]: if not self.exists(): return [] must_include = [value.lower() for value in (must_include or [])] must_not_include = [value.lower() for value in (must_not_include or [])] query_tokens = tokenize(query) normalized_category = category.lower() if category else None if normalized_category in CATEGORY_ALIASES: normalized_category = CATEGORY_ALIASES[normalized_category] hint_tokens = [] for token in query_tokens: hint_tokens.extend(QUERY_HINTS.get(token, [])) candidate_set = set(candidate_ids or []) semantic_ranks = semantic_ranks or {} scored_items: list[tuple[int, MenuItem]] = [] for item, text in self.menu_documents(): if candidate_set and item.item_id not in candidate_set: continue lowered = text.lower() if normalized_category and item.category.lower() != normalized_category: continue if max_price is not None and item.price is not None and item.price > max_price: continue if max_price is not None and item.price is None: continue if any(value not in lowered for value in must_include): continue if any(value in lowered for value in must_not_include): continue score = 0 for token in query_tokens: if token in lowered: score += 3 if token in item.name.lower(): score += 5 for token in hint_tokens: if token in lowered: score += 6 if token == item.category.lower(): score += 8 for token in must_include: if token in lowered: score += 4 if item.item_id in semantic_ranks: score += max(0, 20 - semantic_ranks[item.item_id]) if not query_tokens and not must_include and category: score += 1 scored_items.append((score, item)) scored_items.sort( key=lambda row: ( row[0], -(row[1].price or 0), row[1].name, ), reverse=True, ) results: list[dict[str, object]] = [] for score, item in scored_items[:limit]: results.append( { "item_id": item.item_id, "name": item.name, "category": item.category, "description": item.description, "ingredients": item.ingredients, "price": item.price, "price_label": item.price_label, "size": item.size, "photo_url": item.photo_url, "source_url": item.source_url, "score": score, } ) return results