Spaces:

JJTsao
/

rag-movie-api

Running

App Files Files Community

rag-movie-api / app /llm_services.py

JJTsao

Upload llm_services.py

a036209 verified 3 months ago

raw

history blame

3.73 kB

	import time

	import torch
	from openai import OpenAI
	from sentence_transformers import SentenceTransformer
	from app.config import EMBEDDING_MODEL, OPENAI_MODEL, OPENAI_API_KEY

	# === LLM Config ===
	_sentence_model = None # Not loaded at import time

	# === Clients ===
	openai_client = OpenAI(api_key=OPENAI_API_KEY)

	# === System Prompt ===
	SYSTEM_PROMPT = """
	You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using only the provided list.

	Focus on:

	- Artistic merit and storytelling
	- Genres, themes, tone, and emotional resonance
	- IMDB and Rotten Tomatoes ratings
	- Strong character-driven or thematically rich selections

	### Response Format (in markdown):

	1. Start with a concise 2 sentences opening paragraph that contextualizes the theme and the overall viewing experience the user is seeking. At the end of this paragraph, insert the token: <!-- END_INTRO -->.

	2. Then, for each recommendation, use the following format (repeat for each title). At the end of each movie recommendation block, insert the token: <!-- END_MOVIE -->:

	```
	### <Number>. <Movie Title>
	- POSTER_PATH: /abc123.jpg
	- BACKDROP_PATH: /abc123.jpg
	- GENRES: Genre1, Genre2, ...
	- IMDB_RATING: X.X
	- ROTTEN_TOMATOES_RATING: XX%
	- TRAILER_KEY: abc123
	- WHY_YOU_MIGHT_ENJOY_IT: <Short paragraph explaining the appeal based on character, themes, tone, and relevance to the user's intent.>
	<!-- END_MOVIE -->
	```

	3. End with a brief closing paragraph that summarizes the emotional or intellectual throughline across the recommendations, and affirms their alignment with the user's preferences.

	Write in Markdown only. Be concise, authoritative, and avoid overly generic statements. Each "Why You Might Enjoy It" should be specific and grounded in the movie’s themes, storytelling, or cultural relevance.
	"""


	def load_sentence_model():
	global _sentence_model
	if _sentence_model is None:
	print("⏳ Loading embedding model...")
	_sentence_model = SentenceTransformer(
	EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
	)

	print(f"🔥 Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")

	# Realistic multi-sentence warmup to trigger full CUDA graph
	warmup_sentences = [
	"A suspenseful thriller with deep character development and moral ambiguity.",
	"Coming-of-age story with emotional storytelling and strong ensemble performances.",
	"Mind-bending sci-fi with philosophical undertones and high concept ideas.",
	"Recommend me some comedies.",
	]
	_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
	time.sleep(0.5)
	_ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
	print("🚀 Embedding model fully warmed up.")

	return _sentence_model


	def embed_text(text: str) -> list[float]:
	model = load_sentence_model()
	return model.encode(text).tolist()


	def build_chat_history(history: list, max_turns: int = 5) -> list:
	return [
	{"role": msg.role, "content": msg.content}
	for msg in history[-max_turns * 2:]
	]



	def call_chat_model_openai(history, user_message: str):
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	messages += build_chat_history(history or [])
	messages.append({"role": "user", "content": user_message})

	response = openai_client.chat.completions.create(
	model=OPENAI_MODEL, messages=messages, temperature=0.7, stream=True
	)

	for chunk in response:
	delta = chunk.choices[0].delta.content
	if delta:
	yield delta