Spaces:

JJTsao
/

rag-movie-api

Running

rag-movie-api / app /llm /custom_models.py

JJ Tsao

API update

1005046 about 2 months ago

2.59 kB

	import time
	from pathlib import Path

	import joblib
	import torch
	from app.core.config import BM25_PATH, EMBEDDING_MODEL, INTENT_MODEL
	from rank_bm25 import BM25Okapi
	from sentence_transformers import SentenceTransformer
	from transformers import pipeline

	# === Model Config ===
	_sentence_model = None # Not loaded at import time


	def load_sentence_model():
	global _sentence_model
	if _sentence_model is None:
	print("Loading embedding model...")
	_sentence_model = SentenceTransformer(
	EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
	)

	print(f"Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")

	# Realistic multi-sentence warmup to trigger full CUDA graph
	warmup_sentences = [
	"A suspenseful thriller with deep character development and moral ambiguity.",
	"Coming-of-age story with emotional storytelling and strong ensemble performances.",
	"Mind-bending sci-fi with philosophical undertones and high concept ideas.",
	"Recommend me some comedies.",
	]
	_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
	time.sleep(0.5)
	_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
	print("🚀 Embedding model fully warmed up.")

	return _sentence_model


	def setup_intent_classifier():
	print(f"Loading intent classifier from {INTENT_MODEL}")
	classifier = pipeline("text-classification", model=INTENT_MODEL)

	print("Warming up intent classifier...")
	warmup_queries = [
	"Can you recommend a feel-good movie?",
	"Who directed The Godfather?",
	"Do you like action films?",
	]
	for q in warmup_queries:
	_ = classifier(q)

	print("🤖 Classifier ready")
	return classifier


	def load_bm25_files() -> tuple[dict[str, BM25Okapi], dict[str, int]]:
	bm25_dir = Path(BM25_PATH)
	try:
	bm25_models = {
	"movie": joblib.load(bm25_dir / "movie_bm25_model.joblib"),
	"tv": joblib.load(bm25_dir / "tv_bm25_model.joblib"),
	}
	bm25_vocabs = {
	"movie": joblib.load(bm25_dir / "movie_bm25_vocab.joblib"),
	"tv": joblib.load(bm25_dir / "tv_bm25_vocab.joblib"),
	}
	print("✅ BM25 files loaded")
	except FileNotFoundError as e:
	raise FileNotFoundError(f"Missing BM25 files: {e}")
	return bm25_models, bm25_vocabs


	def embed_text(text: str) -> list[float]:
	model = load_sentence_model()
	return model.encode(text).tolist()