first commit
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from .config import settings
|
||||
from .models import MenuItem, MenuSnapshot
|
||||
|
||||
|
||||
def tokenize(value: str) -> list[str]:
|
||||
raw_tokens = re.findall(r"[a-zA-Zа-яА-Я0-9]+", value.lower())
|
||||
return [
|
||||
token
|
||||
for token in raw_tokens
|
||||
if token not in QUERY_STOPWORDS and (len(token) > 2 or token.isdigit())
|
||||
]
|
||||
|
||||
|
||||
QUERY_STOPWORDS = {
|
||||
"что",
|
||||
"у",
|
||||
"вас",
|
||||
"есть",
|
||||
"из",
|
||||
"как",
|
||||
"ли",
|
||||
"мне",
|
||||
"могу",
|
||||
"хочу",
|
||||
"надо",
|
||||
"для",
|
||||
"под",
|
||||
"про",
|
||||
"или",
|
||||
"это",
|
||||
"эта",
|
||||
"этот",
|
||||
"какой",
|
||||
"какая",
|
||||
"какие",
|
||||
"посоветуй",
|
||||
"посоветуйте",
|
||||
"подбери",
|
||||
"подобрать",
|
||||
"вкусную",
|
||||
"вкусный",
|
||||
"вкусное",
|
||||
}
|
||||
|
||||
|
||||
QUERY_HINTS = {
|
||||
"шаурма": ["шаурма", "классика"],
|
||||
"шаурмы": ["шаурма", "классика"],
|
||||
"шаверма": ["шаурма", "классика"],
|
||||
"шавуха": ["шаурма", "классика"],
|
||||
"острый": ["халапеньо", "шрирача", "том", "ям"],
|
||||
"острая": ["халапеньо", "шрирача", "том", "ям"],
|
||||
"острое": ["халапеньо", "шрирача", "том", "ям"],
|
||||
"острого": ["халапеньо", "шрирача", "том", "ям"],
|
||||
"пикантный": ["халапеньо", "шрирача", "том", "ям"],
|
||||
"сыр": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
|
||||
"сыром": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
|
||||
"сыра": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
|
||||
"сырный": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
|
||||
"сырная": ["сыр", "моцарелла", "пармезан", "крем", "чиз"],
|
||||
"рыбный": ["лосось"],
|
||||
"рыбная": ["лосось"],
|
||||
"мясной": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"],
|
||||
"мясная": ["свинина", "курица", "ростбиф", "колбаски", "пепперони"],
|
||||
}
|
||||
|
||||
CATEGORY_ALIASES = {
|
||||
"шаурмы": "шаурма",
|
||||
"шаверма": "шаурма",
|
||||
"шавуха": "шаурма",
|
||||
}
|
||||
|
||||
|
||||
class MenuCatalog:
|
||||
def __init__(self) -> None:
|
||||
self.snapshot_path = Path(settings.menu_snapshot_path)
|
||||
|
||||
def exists(self) -> bool:
|
||||
return self.snapshot_path.exists()
|
||||
|
||||
def load_snapshot(self) -> MenuSnapshot:
|
||||
data = json.loads(self.snapshot_path.read_text(encoding="utf-8"))
|
||||
return MenuSnapshot.model_validate(data)
|
||||
|
||||
def menu_documents(self) -> list[tuple[MenuItem, str]]:
|
||||
if not self.exists():
|
||||
return []
|
||||
|
||||
snapshot = self.load_snapshot()
|
||||
documents: list[tuple[MenuItem, str]] = []
|
||||
for item in snapshot.items:
|
||||
text = " | ".join(
|
||||
[
|
||||
item.name,
|
||||
item.category,
|
||||
item.description,
|
||||
", ".join(item.ingredients),
|
||||
item.size or "",
|
||||
item.price_label,
|
||||
]
|
||||
)
|
||||
documents.append((item, text))
|
||||
return documents
|
||||
|
||||
def items_map(self) -> dict[str, MenuItem]:
|
||||
if not self.exists():
|
||||
return {}
|
||||
|
||||
snapshot = self.load_snapshot()
|
||||
return {item.item_id: item for item in snapshot.items}
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str = "",
|
||||
max_price: int | None = None,
|
||||
category: str | None = None,
|
||||
must_include: list[str] | None = None,
|
||||
must_not_include: list[str] | None = None,
|
||||
limit: int = 5,
|
||||
candidate_ids: list[str] | None = None,
|
||||
semantic_ranks: dict[str, int] | None = None,
|
||||
) -> list[dict[str, object]]:
|
||||
if not self.exists():
|
||||
return []
|
||||
|
||||
must_include = [value.lower() for value in (must_include or [])]
|
||||
must_not_include = [value.lower() for value in (must_not_include or [])]
|
||||
query_tokens = tokenize(query)
|
||||
normalized_category = category.lower() if category else None
|
||||
if normalized_category in CATEGORY_ALIASES:
|
||||
normalized_category = CATEGORY_ALIASES[normalized_category]
|
||||
hint_tokens = []
|
||||
for token in query_tokens:
|
||||
hint_tokens.extend(QUERY_HINTS.get(token, []))
|
||||
candidate_set = set(candidate_ids or [])
|
||||
semantic_ranks = semantic_ranks or {}
|
||||
|
||||
scored_items: list[tuple[int, MenuItem]] = []
|
||||
for item, text in self.menu_documents():
|
||||
if candidate_set and item.item_id not in candidate_set:
|
||||
continue
|
||||
|
||||
lowered = text.lower()
|
||||
|
||||
if normalized_category and item.category.lower() != normalized_category:
|
||||
continue
|
||||
if max_price is not None and item.price is not None and item.price > max_price:
|
||||
continue
|
||||
if max_price is not None and item.price is None:
|
||||
continue
|
||||
if any(value not in lowered for value in must_include):
|
||||
continue
|
||||
if any(value in lowered for value in must_not_include):
|
||||
continue
|
||||
|
||||
score = 0
|
||||
for token in query_tokens:
|
||||
if token in lowered:
|
||||
score += 3
|
||||
if token in item.name.lower():
|
||||
score += 5
|
||||
|
||||
for token in hint_tokens:
|
||||
if token in lowered:
|
||||
score += 6
|
||||
if token == item.category.lower():
|
||||
score += 8
|
||||
|
||||
for token in must_include:
|
||||
if token in lowered:
|
||||
score += 4
|
||||
|
||||
if item.item_id in semantic_ranks:
|
||||
score += max(0, 20 - semantic_ranks[item.item_id])
|
||||
|
||||
if not query_tokens and not must_include and category:
|
||||
score += 1
|
||||
|
||||
scored_items.append((score, item))
|
||||
|
||||
scored_items.sort(
|
||||
key=lambda row: (
|
||||
row[0],
|
||||
-(row[1].price or 0),
|
||||
row[1].name,
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
results: list[dict[str, object]] = []
|
||||
for score, item in scored_items[:limit]:
|
||||
results.append(
|
||||
{
|
||||
"item_id": item.item_id,
|
||||
"name": item.name,
|
||||
"category": item.category,
|
||||
"description": item.description,
|
||||
"ingredients": item.ingredients,
|
||||
"price": item.price,
|
||||
"price_label": item.price_label,
|
||||
"size": item.size,
|
||||
"photo_url": item.photo_url,
|
||||
"source_url": item.source_url,
|
||||
"score": score,
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
Reference in New Issue
Block a user