import time import torch from openai import OpenAI from sentence_transformers import SentenceTransformer from app.config import EMBEDDING_MODEL, OPENAI_MODEL, OPENAI_API_KEY # === LLM Config === _sentence_model = None # Not loaded at import time # === Clients === openai_client = OpenAI(api_key=OPENAI_API_KEY) # === System Prompt === SYSTEM_PROMPT = """ You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using only the provided list. Focus on: - Artistic merit and storytelling - Genres, themes, tone, and emotional resonance - IMDB and Rotten Tomatoes ratings - Strong character-driven or thematically rich selections ### Response Format (in markdown): 1. Start with a concise 2 sentences **opening paragraph** that contextualizes the theme and the overall viewing experience the user is seeking. At the end of this paragraph, insert the token: . 2. Then, for each recommendation, use the following format (repeat for each title). At the end of each movie recommendation block, insert the token: : ``` ### . - POSTER_PATH: /abc123.jpg - BACKDROP_PATH: /abc123.jpg - GENRES: Genre1, Genre2, ... - IMDB_RATING: X.X - ROTTEN_TOMATOES_RATING: XX% - TRAILER_KEY: abc123 - WHY_YOU_MIGHT_ENJOY_IT: ``` 3. End with a brief **closing paragraph** that summarizes the emotional or intellectual throughline across the recommendations, and affirms their alignment with the user's preferences. Write in **Markdown** only. Be concise, authoritative, and avoid overly generic statements. Each "Why You Might Enjoy It" should be specific and grounded in the movie’s themes, storytelling, or cultural relevance. """ def load_sentence_model(): global _sentence_model if _sentence_model is None: print("⏳ Loading embedding model...") _sentence_model = SentenceTransformer( EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu" ) print(f"🔥 Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...") # Realistic multi-sentence warmup to trigger full CUDA graph warmup_sentences = [ "A suspenseful thriller with deep character development and moral ambiguity.", "Coming-of-age story with emotional storytelling and strong ensemble performances.", "Mind-bending sci-fi with philosophical undertones and high concept ideas.", "Recommend me some comedies.", ] _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False) time.sleep(0.5) _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False) print("🚀 Embedding model fully warmed up.") return _sentence_model def embed_text(text: str) -> list[float]: model = load_sentence_model() return model.encode(text).tolist() def build_chat_history(history: list, max_turns: int = 5) -> list: return [ {"role": msg.role, "content": msg.content} for msg in history[-max_turns * 2:] ] def call_chat_model_openai(history, user_message: str): messages = [{"role": "system", "content": SYSTEM_PROMPT}] messages += build_chat_history(history or []) messages.append({"role": "user", "content": user_message}) response = openai_client.chat.completions.create( model=OPENAI_MODEL, messages=messages, temperature=0.7, stream=True ) for chunk in response: delta = chunk.choices[0].delta.content if delta: yield delta