Spaces:

JJTsao
/

rag-movie-api

Running

File size: 3,731 Bytes

b68e1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1aaa487
b68e1c5
1aaa487
b68e1c5
1aaa487
 
 
 
 
 
 
7da9455
1aaa487
 
 
 
 
7da9455
a036209
7da9455
 
 
 
 
1aaa487
 
 
 
 
 
b68e1c5

import time

import torch
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from app.config import EMBEDDING_MODEL, OPENAI_MODEL, OPENAI_API_KEY

# === LLM Config ===
_sentence_model = None  # Not loaded at import time

# === Clients ===
openai_client = OpenAI(api_key=OPENAI_API_KEY)

# === System Prompt ===
SYSTEM_PROMPT = """
You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using only the provided list.

Focus on:

- Artistic merit and storytelling
- Genres, themes, tone, and emotional resonance
- IMDB and Rotten Tomatoes ratings
- Strong character-driven or thematically rich selections

### Response Format (in markdown):

1. Start with a concise 2 sentences **opening paragraph** that contextualizes the theme and the overall viewing experience the user is seeking. At the end of this paragraph, insert the token: <!-- END_INTRO -->.

2. Then, for each recommendation, use the following format (repeat for each title). At the end of each movie recommendation block, insert the token: <!-- END_MOVIE -->:

```
### <Number>. <Movie Title>
- POSTER_PATH: /abc123.jpg
- BACKDROP_PATH: /abc123.jpg
- GENRES: Genre1, Genre2, ...
- IMDB_RATING: X.X
- ROTTEN_TOMATOES_RATING: XX%
- TRAILER_KEY: abc123
- WHY_YOU_MIGHT_ENJOY_IT: <Short paragraph explaining the appeal based on character, themes, tone, and relevance to the user's intent.>
<!-- END_MOVIE -->
```

3. End with a brief **closing paragraph** that summarizes the emotional or intellectual throughline across the recommendations, and affirms their alignment with the user's preferences.

Write in **Markdown** only. Be concise, authoritative, and avoid overly generic statements. Each "Why You Might Enjoy It" should be specific and grounded in the movie’s themes, storytelling, or cultural relevance.
"""


def load_sentence_model():
    global _sentence_model
    if _sentence_model is None:
        print("⏳ Loading embedding model...")
        _sentence_model = SentenceTransformer(
            EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
        )

        print(f"🔥 Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")

        # Realistic multi-sentence warmup to trigger full CUDA graph
        warmup_sentences = [
            "A suspenseful thriller with deep character development and moral ambiguity.",
            "Coming-of-age story with emotional storytelling and strong ensemble performances.",
            "Mind-bending sci-fi with philosophical undertones and high concept ideas.",
            "Recommend me some comedies.",
        ]
        _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
        time.sleep(0.5)
        _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
        print("🚀 Embedding model fully warmed up.")

    return _sentence_model


def embed_text(text: str) -> list[float]:
    model = load_sentence_model()
    return model.encode(text).tolist()


def build_chat_history(history: list, max_turns: int = 5) -> list:
    return [
        {"role": msg.role, "content": msg.content}
        for msg in history[-max_turns * 2:]
    ]



def call_chat_model_openai(history, user_message: str):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    messages += build_chat_history(history or [])
    messages.append({"role": "user", "content": user_message})

    response = openai_client.chat.completions.create(
        model=OPENAI_MODEL, messages=messages, temperature=0.7, stream=True
    )

    for chunk in response:
        delta = chunk.choices[0].delta.content
        if delta:
            yield delta