JJ Tsao commited on
Commit
1005046
Β·
1 Parent(s): b68e1c5

API update

Browse files
README.md CHANGED
@@ -9,5 +9,3 @@ pinned: false
9
  license: mit
10
  short_description: API service for rage movie and tv show recommendation app
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  license: mit
10
  short_description: API service for rage movie and tv show recommendation app
11
  ---
 
 
app/__pycache__/__init__.cpython-313.pyc DELETED
Binary file (190 Bytes)
 
app/__pycache__/api_routes.cpython-313.pyc DELETED
Binary file (1.27 kB)
 
app/__pycache__/bootstrap.cpython-313.pyc DELETED
Binary file (3.5 kB)
 
app/__pycache__/chatbot.cpython-313.pyc DELETED
Binary file (4.04 kB)
 
app/__pycache__/config.cpython-313.pyc DELETED
Binary file (1.51 kB)
 
app/__pycache__/llm_services.cpython-313.pyc DELETED
Binary file (3.75 kB)
 
app/__pycache__/media_retriever.cpython-313.pyc DELETED
Binary file (11.8 kB)
 
app/__pycache__/retriever.cpython-313.pyc DELETED
Binary file (527 Bytes)
 
app/__pycache__/schemas.cpython-313.pyc DELETED
Binary file (2.16 kB)
 
app/__pycache__/vectorstore.cpython-313.pyc DELETED
Binary file (741 Bytes)
 
app/api/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/{api_routes.py β†’ api/api_routes.py} RENAMED
@@ -1,6 +1,7 @@
1
- from app.bootstrap import chat_fn
2
- from app.schemas import ChatRequest
3
- from fastapi import APIRouter
 
4
  from fastapi.responses import StreamingResponse
5
 
6
  router = APIRouter()
@@ -16,8 +17,32 @@ async def chat_endpoint(req: ChatRequest):
16
  genres=req.genres,
17
  providers=req.providers,
18
  year_range=tuple(req.year_range),
 
 
 
19
  )
20
  for chunk in generator:
21
  yield chunk
22
 
23
  return StreamingResponse(response_stream(), media_type="text/plain")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.core.bootstrap import chat_fn
2
+ from app.api.schemas import ChatRequest, FinalRecsRequest
3
+ from app.services.usage_logger import log_final_results
4
+ from fastapi import APIRouter, HTTPException
5
  from fastapi.responses import StreamingResponse
6
 
7
  router = APIRouter()
 
17
  genres=req.genres,
18
  providers=req.providers,
19
  year_range=tuple(req.year_range),
20
+ session_id=req.session_id,
21
+ query_id=req.query_id,
22
+ device_info=req.device_info,
23
  )
24
  for chunk in generator:
25
  yield chunk
26
 
27
  return StreamingResponse(response_stream(), media_type="text/plain")
28
+
29
+
30
+ @router.post("/log/final_recs")
31
+ async def log_final_recommendations(req: FinalRecsRequest):
32
+ rows = [
33
+ {
34
+ "query_id": req.query_id,
35
+ "media_id": rec.media_id,
36
+ "is_final_rec": True,
37
+ "why_summary": rec.why
38
+ }
39
+ for rec in req.final_recs
40
+ ]
41
+
42
+ try:
43
+ log_final_results(rows)
44
+ return {"status": "ok"}
45
+ except Exception as e:
46
+ print(f"❌ Error logging final recs: {e}")
47
+ raise HTTPException(status_code=500, detail="Failed to log final recommendations")
48
+
app/{schemas.py β†’ api/schemas.py} RENAMED
@@ -1,5 +1,5 @@
1
  from enum import Enum
2
- from typing import List
3
 
4
  from pydantic import BaseModel, field_validator, model_validator
5
 
@@ -13,6 +13,11 @@ class MediaType(str, Enum):
13
  MOVIE = "movies"
14
  TV = "tvs"
15
 
 
 
 
 
 
16
 
17
  class ChatRequest(BaseModel):
18
  question: str
@@ -21,6 +26,9 @@ class ChatRequest(BaseModel):
21
  genres: List[str] = []
22
  providers: List[str] = []
23
  year_range: List[int] = [1920, 2025]
 
 
 
24
 
25
  @field_validator("question")
26
  def validate_question(cls, v):
@@ -33,3 +41,13 @@ class ChatRequest(BaseModel):
33
  if len(self.year_range) != 2:
34
  raise ValueError("year_range must be a list of exactly two integers: [start, end]")
35
  return self
 
 
 
 
 
 
 
 
 
 
 
1
  from enum import Enum
2
+ from typing import List, Optional
3
 
4
  from pydantic import BaseModel, field_validator, model_validator
5
 
 
13
  MOVIE = "movies"
14
  TV = "tvs"
15
 
16
+ class DeviceInfo(BaseModel):
17
+ device_type: Optional[str] = None
18
+ platform: Optional[str] = None
19
+ user_agent: Optional[str] = None
20
+
21
 
22
  class ChatRequest(BaseModel):
23
  question: str
 
26
  genres: List[str] = []
27
  providers: List[str] = []
28
  year_range: List[int] = [1920, 2025]
29
+ session_id: str
30
+ query_id: str
31
+ device_info: Optional[DeviceInfo] = None
32
 
33
  @field_validator("question")
34
  def validate_question(cls, v):
 
41
  if len(self.year_range) != 2:
42
  raise ValueError("year_range must be a list of exactly two integers: [start, end]")
43
  return self
44
+
45
+
46
+ class FinalRec(BaseModel):
47
+ media_id: int
48
+ why: str
49
+
50
+
51
+ class FinalRecsRequest(BaseModel):
52
+ query_id: str
53
+ final_recs: List[FinalRec]
app/core/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/core/bootstrap.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ import nltk
5
+ from app.services.chatbot import build_chat_fn
6
+ from app.core.config import (
7
+ NLTK_PATH,
8
+ QDRANT_API_KEY,
9
+ QDRANT_ENDPOINT,
10
+ QDRANT_MOVIE_COLLECTION_NAME,
11
+ QDRANT_TV_COLLECTION_NAME,
12
+ )
13
+ from app.llm.custom_models import load_sentence_model, load_bm25_files, setup_intent_classifier
14
+ from app.retrieval.retriever import get_media_retriever
15
+ from app.retrieval.vectorstore import connect_qdrant
16
+
17
+ start = time.time()
18
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
+ # nltk.data.path.append(str(NLTK_PATH))
20
+
21
+
22
+ def setup_retriever():
23
+ embed_model = load_sentence_model()
24
+ bm25_models, bm25_vocabs = load_bm25_files()
25
+ nltk.data.path.append(str(NLTK_PATH))
26
+ print("βœ… NLTK resources loaded")
27
+
28
+ qdrant_client = connect_qdrant(endpoint=QDRANT_ENDPOINT, api_key=QDRANT_API_KEY)
29
+
30
+ return get_media_retriever(
31
+ embed_model=embed_model,
32
+ qdrant_client=qdrant_client,
33
+ bm25_models=bm25_models,
34
+ bm25_vocabs=bm25_vocabs,
35
+ movie_collection_name=QDRANT_MOVIE_COLLECTION_NAME,
36
+ tv_collection_name=QDRANT_TV_COLLECTION_NAME,
37
+ )
38
+
39
+
40
+ # Initialize once at startup
41
+ retriever = setup_retriever()
42
+ intent_classifier = setup_intent_classifier()
43
+ chat_fn = build_chat_fn(retriever, intent_classifier)
44
+
45
+ print(f"πŸ”§ Total startup time: {time.time() - start:.2f}s")
46
+
app/{config.py β†’ core/config.py} RENAMED
@@ -9,18 +9,27 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
9
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
10
 
11
  QDRANT_ENDPOINT = os.getenv("QDRANT_ENDPOINT")
12
- QDRANT_MOVIE_COLLECTION_NAME = os.getenv("QDRANT_MOVIE_COLLECTION_NAME_BGE")
13
- QDRANT_TV_COLLECTION_NAME = os.getenv("QDRANT_TV_COLLECTION_NAME_BGE")
 
 
14
 
15
- NLTK_PATH = Path(__file__).resolve().parent.parent / "data" / "nltk_data"
16
- BM25_PATH = Path(__file__).resolve().parent.parent / "data" / "bm25_files"
17
 
18
- INTENT_MODEL = "JJTsao/intent-classifier-distilbert-moviebot" # Fine-tuned intent classification model for query intent classifiation
19
- EMBEDDING_MODEL = "JJTsao/fine-tuned_movie_retriever-bge-base-en-v1.5" # Fine-tuned sentence transfomer model for query dense vector embedding
 
 
 
20
  OPENAI_MODEL = "gpt-4o-mini" # LLM for chat completions
21
 
22
 
23
  if not OPENAI_API_KEY or not QDRANT_API_KEY:
24
  raise ValueError("Missing API key(s).")
25
- if not QDRANT_ENDPOINT or not QDRANT_MOVIE_COLLECTION_NAME or not QDRANT_TV_COLLECTION_NAME:
 
 
 
 
26
  raise ValueError("Missing QDrant URL or collection name.")
 
9
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
10
 
11
  QDRANT_ENDPOINT = os.getenv("QDRANT_ENDPOINT")
12
+ # QDRANT_MOVIE_COLLECTION_NAME = os.getenv("QDRANT_MOVIE_COLLECTION_NAME_BGE")
13
+ # QDRANT_TV_COLLECTION_NAME = os.getenv("QDRANT_TV_COLLECTION_NAME_BGE")
14
+ QDRANT_MOVIE_COLLECTION_NAME = "Movies_BGE_June"
15
+ QDRANT_TV_COLLECTION_NAME = "TV_Shows_BGE_June"
16
 
17
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
18
+ SUPABASE_API_KEY = os.getenv("SUPABASE_API_KEY")
19
 
20
+ NLTK_PATH = Path(__file__).resolve().parent.parent.parent / "data" / "nltk_data"
21
+ BM25_PATH = Path(__file__).resolve().parent.parent.parent / "data" / "bm25_files"
22
+
23
+ INTENT_MODEL = "JJTsao/intent-classifier-distilbert-moviebot" # Fine-tuned intent classification model for query intent classifiation
24
+ EMBEDDING_MODEL = "JJTsao/fine-tuned_movie_retriever-bge-base-en-v1.5" # Fine-tuned sentence transfomer model for query dense vector embedding
25
  OPENAI_MODEL = "gpt-4o-mini" # LLM for chat completions
26
 
27
 
28
  if not OPENAI_API_KEY or not QDRANT_API_KEY:
29
  raise ValueError("Missing API key(s).")
30
+ if (
31
+ not QDRANT_ENDPOINT
32
+ or not QDRANT_MOVIE_COLLECTION_NAME
33
+ or not QDRANT_TV_COLLECTION_NAME
34
+ ):
35
  raise ValueError("Missing QDrant URL or collection name.")
app/llm/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/{bootstrap.py β†’ llm/custom_models.py} RENAMED
@@ -1,69 +1,47 @@
1
- import os
2
  import time
3
  from pathlib import Path
4
 
5
  import joblib
6
- import nltk
7
- from app.chatbot import build_chat_fn
8
- from app.config import (
9
- BM25_PATH,
10
- INTENT_MODEL,
11
- NLTK_PATH,
12
- QDRANT_API_KEY,
13
- QDRANT_ENDPOINT,
14
- QDRANT_MOVIE_COLLECTION_NAME,
15
- QDRANT_TV_COLLECTION_NAME,
16
- )
17
- from app.llm_services import load_sentence_model
18
- from app.retriever import get_media_retriever
19
- from app.vectorstore import connect_qdrant
20
  from rank_bm25 import BM25Okapi
 
21
  from transformers import pipeline
22
 
23
- start = time.time()
24
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
25
 
26
 
27
- def load_bm25_files() -> tuple[dict[str, BM25Okapi], dict[str, int]]:
28
- bm25_dir = Path(BM25_PATH)
29
- try:
30
- bm25_models = {
31
- "movie": joblib.load(bm25_dir / "movie_bm25_model.joblib"),
32
- "tv": joblib.load(bm25_dir / "tv_bm25_model.joblib"),
33
- }
34
- bm25_vocabs = {
35
- "movie": joblib.load(bm25_dir / "movie_bm25_vocab.joblib"),
36
- "tv": joblib.load(bm25_dir / "tv_bm25_vocab.joblib"),
37
- }
38
- except FileNotFoundError as e:
39
- raise FileNotFoundError(f"Missing BM25 files: {e}")
40
- return bm25_models, bm25_vocabs
41
-
42
 
43
- def setup_retriever():
44
- embed_model = load_sentence_model()
45
- qdrant_client = connect_qdrant(endpoint=QDRANT_ENDPOINT, api_key=QDRANT_API_KEY)
46
- nltk.data.path.append(str(NLTK_PATH))
47
- print("βœ… NLTK resources loaded")
48
 
49
- bm25_models, bm25_vocabs = load_bm25_files()
50
- print("βœ… BM25 files loaded")
 
 
 
 
 
 
 
 
 
51
 
52
- return get_media_retriever(
53
- embed_model=embed_model,
54
- qdrant_client=qdrant_client,
55
- bm25_models=bm25_models,
56
- bm25_vocabs=bm25_vocabs,
57
- movie_collection_name=QDRANT_MOVIE_COLLECTION_NAME,
58
- tv_collection_name=QDRANT_TV_COLLECTION_NAME,
59
- )
60
 
61
 
62
  def setup_intent_classifier():
63
- print(f"πŸ”§ Loading intent classifier from {INTENT_MODEL}")
64
  classifier = pipeline("text-classification", model=INTENT_MODEL)
65
 
66
- print("πŸ”₯ Warming up intent classifier...")
67
  warmup_queries = [
68
  "Can you recommend a feel-good movie?",
69
  "Who directed The Godfather?",
@@ -76,10 +54,23 @@ def setup_intent_classifier():
76
  return classifier
77
 
78
 
79
- # Initialize once at startup
80
- retriever = setup_retriever()
81
- intent_classifier = setup_intent_classifier()
82
- chat_fn = build_chat_fn(retriever, intent_classifier)
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- print(f"πŸ”§ Total startup time: {time.time() - start:.2f}s")
85
 
 
 
 
 
 
1
  import time
2
  from pathlib import Path
3
 
4
  import joblib
5
+ import torch
6
+ from app.core.config import BM25_PATH, EMBEDDING_MODEL, INTENT_MODEL
 
 
 
 
 
 
 
 
 
 
 
 
7
  from rank_bm25 import BM25Okapi
8
+ from sentence_transformers import SentenceTransformer
9
  from transformers import pipeline
10
 
11
+ # === Model Config ===
12
+ _sentence_model = None # Not loaded at import time
13
 
14
 
15
+ def load_sentence_model():
16
+ global _sentence_model
17
+ if _sentence_model is None:
18
+ print("Loading embedding model...")
19
+ _sentence_model = SentenceTransformer(
20
+ EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
21
+ )
 
 
 
 
 
 
 
 
22
 
23
+ print(f"Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")
 
 
 
 
24
 
25
+ # Realistic multi-sentence warmup to trigger full CUDA graph
26
+ warmup_sentences = [
27
+ "A suspenseful thriller with deep character development and moral ambiguity.",
28
+ "Coming-of-age story with emotional storytelling and strong ensemble performances.",
29
+ "Mind-bending sci-fi with philosophical undertones and high concept ideas.",
30
+ "Recommend me some comedies.",
31
+ ]
32
+ _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
33
+ time.sleep(0.5)
34
+ _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
35
+ print("πŸš€ Embedding model fully warmed up.")
36
 
37
+ return _sentence_model
 
 
 
 
 
 
 
38
 
39
 
40
  def setup_intent_classifier():
41
+ print(f"Loading intent classifier from {INTENT_MODEL}")
42
  classifier = pipeline("text-classification", model=INTENT_MODEL)
43
 
44
+ print("Warming up intent classifier...")
45
  warmup_queries = [
46
  "Can you recommend a feel-good movie?",
47
  "Who directed The Godfather?",
 
54
  return classifier
55
 
56
 
57
+ def load_bm25_files() -> tuple[dict[str, BM25Okapi], dict[str, int]]:
58
+ bm25_dir = Path(BM25_PATH)
59
+ try:
60
+ bm25_models = {
61
+ "movie": joblib.load(bm25_dir / "movie_bm25_model.joblib"),
62
+ "tv": joblib.load(bm25_dir / "tv_bm25_model.joblib"),
63
+ }
64
+ bm25_vocabs = {
65
+ "movie": joblib.load(bm25_dir / "movie_bm25_vocab.joblib"),
66
+ "tv": joblib.load(bm25_dir / "tv_bm25_vocab.joblib"),
67
+ }
68
+ print("βœ… BM25 files loaded")
69
+ except FileNotFoundError as e:
70
+ raise FileNotFoundError(f"Missing BM25 files: {e}")
71
+ return bm25_models, bm25_vocabs
72
 
 
73
 
74
+ def embed_text(text: str) -> list[float]:
75
+ model = load_sentence_model()
76
+ return model.encode(text).tolist()
app/{llm_services.py β†’ llm/llm_completion.py} RENAMED
@@ -1,57 +1,43 @@
1
- import time
2
-
3
- import torch
4
  from openai import OpenAI
5
- from sentence_transformers import SentenceTransformer
6
- from app.config import EMBEDDING_MODEL, OPENAI_MODEL, OPENAI_API_KEY
7
-
8
- # === LLM Config ===
9
- _sentence_model = None # Not loaded at import time
10
 
11
  # === Clients ===
12
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
13
 
14
  # === System Prompt ===
15
  SYSTEM_PROMPT = """
16
- You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using the provided context. Do not seek film or tv show options outside of the list provided to you.
17
- Focus on:
18
-
19
- - Artistic merit and storytelling
20
- - Genres, themes, and tone
21
- - Popularity, IMDB ratings, and Rotten Tomatoes ratings
22
 
23
- Provide a brief explanation of why the user might enjoy each movie or tv series. Include IMDB rating, Rotten Tomatoe ratings, and a poster. Answer with authority and care. Respond in markdown.
24
- """
25
 
 
 
 
 
26
 
27
- def load_sentence_model():
28
- global _sentence_model
29
- if _sentence_model is None:
30
- print("⏳ Loading embedding model...")
31
- _sentence_model = SentenceTransformer(
32
- EMBEDDING_MODEL, device="cuda" if torch.cuda.is_available() else "cpu"
33
- )
34
 
35
- print(f"πŸ”₯ Model '{EMBEDDING_MODEL}' loaded. Performing GPU warmup...")
36
 
37
- # Realistic multi-sentence warmup to trigger full CUDA graph
38
- warmup_sentences = [
39
- "A suspenseful thriller with deep character development and moral ambiguity.",
40
- "Coming-of-age story with emotional storytelling and strong ensemble performances.",
41
- "Mind-bending sci-fi with philosophical undertones and high concept ideas.",
42
- "Recommend me some comedies.",
43
- ]
44
- _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
45
- time.sleep(0.5)
46
- _ = _sentence_model.encode(warmup_sentences, show_progress_bar=False)
47
- print("πŸš€ Embedding model fully warmed up.")
48
 
49
- return _sentence_model
 
 
 
 
 
 
 
 
 
 
 
50
 
 
51
 
52
- def embed_text(text: str) -> list[float]:
53
- model = load_sentence_model()
54
- return model.encode(text).tolist()
55
 
56
 
57
  def build_chat_history(history: list, max_turns: int = 5) -> list:
@@ -61,7 +47,6 @@ def build_chat_history(history: list, max_turns: int = 5) -> list:
61
  ]
62
 
63
 
64
-
65
  def call_chat_model_openai(history, user_message: str):
66
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
67
  messages += build_chat_history(history or [])
 
 
 
 
1
  from openai import OpenAI
2
+ from app.core.config import OPENAI_MODEL, OPENAI_API_KEY
 
 
 
 
3
 
4
  # === Clients ===
5
  openai_client = OpenAI(api_key=OPENAI_API_KEY)
6
 
7
  # === System Prompt ===
8
  SYSTEM_PROMPT = """
9
+ You are a professional film curator and critic. Your role is to analyze the user's preferences and recommend high-quality films or TV shows using only the provided list.
 
 
 
 
 
10
 
11
+ Focus on:
 
12
 
13
+ - Artistic merit and storytelling
14
+ - Genres, themes, tone, and emotional resonance
15
+ - IMDB and Rotten Tomatoes ratings
16
+ - Strong character-driven or thematically rich selections
17
 
18
+ ### Response Format (in markdown):
 
 
 
 
 
 
19
 
20
+ 1. Start with a concise 2 sentences **opening paragraph** that contextualizes the theme and the overall viewing experience the user is seeking. At the end of this paragraph, insert the token: <!-- END_INTRO -->.
21
 
22
+ 2. Then, for each recommendation, use the following format (repeat for each title). At the end of each movie recommendation block, insert the token: <!-- END_MOVIE -->:
 
 
 
 
 
 
 
 
 
 
23
 
24
+ ```
25
+ ### <Number>. <Movie Title>
26
+ - GENRES: Genre1, Genre2, ...
27
+ - IMDB_RATING: X.X
28
+ - ROTTEN_TOMATOES_RATING: XX%
29
+ - MEDIA_ID: 1234
30
+ - POSTER_PATH: /abc123.jpg
31
+ - BACKDROP_PATH: /abc123.jpg
32
+ - TRAILER_KEY: abc123
33
+ - WHY_YOU_MIGHT_ENJOY_IT: <Short paragraph explaining the appeal based on character, themes, tone, and relevance to the user's intent.>
34
+ <!-- END_MOVIE -->
35
+ ```
36
 
37
+ 3. End with a brief **closing paragraph** that summarizes the emotional or intellectual throughline across the recommendations, and affirms their alignment with the user's preferences.
38
 
39
+ Write in **Markdown** only. Be concise, authoritative, and avoid overly generic statements. Each "Why You Might Enjoy It" should be specific and grounded in the movie’s themes, storytelling, or cultural relevance.
40
+ """
 
41
 
42
 
43
  def build_chat_history(history: list, max_turns: int = 5) -> list:
 
47
  ]
48
 
49
 
 
50
  def call_chat_model_openai(history, user_message: str):
51
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
52
  messages += build_chat_history(history or [])
app/retrieval/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/{media_retriever.py β†’ retrieval/media_retriever.py} RENAMED
@@ -1,5 +1,6 @@
1
  from collections import Counter
2
- from typing import Dict, List
 
3
 
4
  from nltk.corpus import stopwords
5
  from nltk.stem import PorterStemmer
@@ -8,6 +9,8 @@ from qdrant_client import QdrantClient
8
  from qdrant_client.models import FieldCondition, Filter, MatchValue, Range, models
9
  from sentence_transformers import SentenceTransformer
10
 
 
 
11
 
12
  class MediaRetriever:
13
  def __init__(
@@ -45,7 +48,12 @@ class MediaRetriever:
45
 
46
  @staticmethod
47
  def tokenize_and_preprocess(text: str) -> List[str]:
48
- stop_words = set(stopwords.words("english"))
 
 
 
 
 
49
  stemmer = PorterStemmer()
50
 
51
  tokens = word_tokenize(text.lower())
@@ -118,7 +126,7 @@ class MediaRetriever:
118
 
119
  # Fuse dense and sparse results and rerank
120
  fused = self.fuse_dense_sparse(dense_results, sparse_results)
121
- reranked = self.rerank_fused_results(fused)
122
 
123
  reranked_ids = [p.id for p in reranked[:20]]
124
  print ("\nReranked Top-30:")
@@ -129,7 +137,7 @@ class MediaRetriever:
129
  f"#{i + 1} {p.payload.get('title', '')} | Score: {p.score} Dense: {f['dense_score']:.3f}, Sparse: {f['sparse_score']:.3f}, Pop: {p.payload.get('popularity', 0)}, Rating: {p.payload.get('vote_average', 0)}"
130
  )
131
 
132
- return reranked[: self.top_k]
133
 
134
  def _build_filter(
135
  self, genres=None, providers=None, year_range=None
@@ -172,7 +180,7 @@ class MediaRetriever:
172
  using="dense_vector",
173
  query_filter=qdrant_filter,
174
  limit=self.semantic_retrieval_limit,
175
- with_payload=["llm_context", "title", "popularity", "vote_average"],
176
  with_vectors=False,
177
  )
178
 
@@ -188,7 +196,7 @@ class MediaRetriever:
188
  using="sparse_vector",
189
  query_filter=qdrant_filter,
190
  limit=self.bm25_retrieval_limit,
191
- with_payload=["llm_context", "title", "popularity", "vote_average"],
192
  with_vectors=False,
193
  )
194
 
@@ -227,29 +235,38 @@ class MediaRetriever:
227
  def rerank_fused_results(
228
  self,
229
  fused: Dict[str, Dict],
230
- ) -> List:
231
  max_popularity = max(
232
  (float(f["point"].payload.get("popularity", 0)) for f in fused.values()),
233
  default=1.0,
234
  )
235
 
236
- def compute_score(f):
 
237
  point = f["point"]
238
  dense_score = f["dense_score"]
239
  sparse_score = f["sparse_score"]
240
  popularity = float(point.payload.get("popularity", 0)) / max_popularity
241
  vote_average = float(point.payload.get("vote_average", 0)) / 10.0
242
 
243
- return (
244
  self.dense_weight * dense_score
245
  + self.sparse_weight * sparse_score
246
  + self.rating_weight * vote_average
247
  + self.popularity_weight * popularity
248
  )
249
 
250
- reranked = sorted(fused.values(), key=compute_score, reverse=True)
 
 
 
 
 
 
 
 
 
251
 
252
- return [f["point"] for f in reranked]
253
 
254
  def format_context(self, movies: list[dict]) -> str:
255
  # Formart the retrieved documents as context for LLM
 
1
  from collections import Counter
2
+ from typing import Dict, List, Tuple
3
+ import threading
4
 
5
  from nltk.corpus import stopwords
6
  from nltk.stem import PorterStemmer
 
9
  from qdrant_client.models import FieldCondition, Filter, MatchValue, Range, models
10
  from sentence_transformers import SentenceTransformer
11
 
12
+ _stop_words_lock = threading.Lock()
13
+
14
 
15
  class MediaRetriever:
16
  def __init__(
 
48
 
49
  @staticmethod
50
  def tokenize_and_preprocess(text: str) -> List[str]:
51
+ with _stop_words_lock:
52
+ try:
53
+ stop_words = set(stopwords.words("english"))
54
+ except Exception as e:
55
+ print("⚠️ Failed to load NLTK stopwords:", e)
56
+ stop_words = set()
57
  stemmer = PorterStemmer()
58
 
59
  tokens = word_tokenize(text.lower())
 
126
 
127
  # Fuse dense and sparse results and rerank
128
  fused = self.fuse_dense_sparse(dense_results, sparse_results)
129
+ reranked, scored_lookup = self.rerank_fused_results(fused)
130
 
131
  reranked_ids = [p.id for p in reranked[:20]]
132
  print ("\nReranked Top-30:")
 
137
  f"#{i + 1} {p.payload.get('title', '')} | Score: {p.score} Dense: {f['dense_score']:.3f}, Sparse: {f['sparse_score']:.3f}, Pop: {p.payload.get('popularity', 0)}, Rating: {p.payload.get('vote_average', 0)}"
138
  )
139
 
140
+ return reranked[: self.top_k], scored_lookup
141
 
142
  def _build_filter(
143
  self, genres=None, providers=None, year_range=None
 
180
  using="dense_vector",
181
  query_filter=qdrant_filter,
182
  limit=self.semantic_retrieval_limit,
183
+ with_payload=["llm_context", "media_id", "title", "popularity", "vote_average"],
184
  with_vectors=False,
185
  )
186
 
 
196
  using="sparse_vector",
197
  query_filter=qdrant_filter,
198
  limit=self.bm25_retrieval_limit,
199
+ with_payload=["llm_context", "media_id", "title", "popularity", "vote_average"],
200
  with_vectors=False,
201
  )
202
 
 
235
  def rerank_fused_results(
236
  self,
237
  fused: Dict[str, Dict],
238
+ ) -> Tuple[List, Dict]:
239
  max_popularity = max(
240
  (float(f["point"].payload.get("popularity", 0)) for f in fused.values()),
241
  default=1.0,
242
  )
243
 
244
+ scored = {}
245
+ for id_, f in fused.items():
246
  point = f["point"]
247
  dense_score = f["dense_score"]
248
  sparse_score = f["sparse_score"]
249
  popularity = float(point.payload.get("popularity", 0)) / max_popularity
250
  vote_average = float(point.payload.get("vote_average", 0)) / 10.0
251
 
252
+ reranked_score = (
253
  self.dense_weight * dense_score
254
  + self.sparse_weight * sparse_score
255
  + self.rating_weight * vote_average
256
  + self.popularity_weight * popularity
257
  )
258
 
259
+ scored[id_] = {
260
+ "point": point,
261
+ "dense_score": dense_score,
262
+ "sparse_score": sparse_score,
263
+ "reranked_score": reranked_score,
264
+ }
265
+
266
+ sorted_ids = sorted(scored.items(), key=lambda x: x[1]["reranked_score"], reverse=True)
267
+
268
+ return [v["point"] for _, v in sorted_ids], scored
269
 
 
270
 
271
  def format_context(self, movies: list[dict]) -> str:
272
  # Formart the retrieved documents as context for LLM
app/{retriever.py β†’ retrieval/retriever.py} RENAMED
@@ -1,4 +1,4 @@
1
- from app.media_retriever import MediaRetriever
2
 
3
  def get_media_retriever(
4
  embed_model,
 
1
+ from app.retrieval.media_retriever import MediaRetriever
2
 
3
  def get_media_retriever(
4
  embed_model,
app/{vectorstore.py β†’ retrieval/vectorstore.py} RENAMED
File without changes
app/services/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/{chatbot.py β†’ services/chatbot.py} RENAMED
@@ -2,11 +2,11 @@ import re
2
  import time
3
  from concurrent.futures import ThreadPoolExecutor
4
 
5
- from app.llm_services import call_chat_model_openai
6
-
7
 
8
  def sanitize_markdown(md_text: str) -> str:
9
- return re.sub(r'!\[.*?\]\(.*?\)', '', md_text)
10
 
11
 
12
  def build_chat_fn(retriever, intent_classifier):
@@ -17,9 +17,12 @@ def build_chat_fn(retriever, intent_classifier):
17
  genres=None,
18
  providers=None,
19
  year_range=None,
 
 
 
20
  ):
21
  full_t0 = time.time()
22
-
23
  with ThreadPoolExecutor() as executor:
24
  # Classify user intent to determine if it is a recommendation ask
25
  t0 = time.time()
@@ -46,11 +49,10 @@ def build_chat_fn(retriever, intent_classifier):
46
  t0 = time.time()
47
  sparse_vector = retriever.embed_sparse(question, media_type)
48
  print(f"πŸ“ˆ embed_sparse() result received in {time.time() - t0:.3f}s")
49
-
50
  if is_rec_intent:
51
- # If Yes, proceed with the RAG pipeline for retrieval and recommendation
52
- t0 = time.time()
53
- retrieved_movies = retriever.retrieve_and_rerank(
54
  dense_vector,
55
  sparse_vector,
56
  media_type.lower(),
@@ -60,18 +62,73 @@ def build_chat_fn(retriever, intent_classifier):
60
  )
61
  print(f"\nπŸ“š retrieve_and_rerank() took {time.time() - t0:.3f}s")
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  context = retriever.format_context(retrieved_movies)
64
  user_message = f"{question}\n\nContext:\nBased on the following retrieved {media_type.lower()}, suggest the best recommendations.\n\n{context}"
65
-
66
- print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
 
 
 
67
  for chunk in call_chat_model_openai(history, user_message):
68
  yield chunk
69
 
70
  else:
71
- # If No, proceed with a general conversation
72
- user_message = question
 
 
 
 
 
 
 
 
73
 
74
- print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
 
 
 
 
 
 
 
75
  for chunk in call_chat_model_openai(history, user_message):
76
  yield sanitize_markdown(chunk)
77
 
 
2
  import time
3
  from concurrent.futures import ThreadPoolExecutor
4
 
5
+ from app.llm.llm_completion import call_chat_model_openai
6
+ from app.services.usage_logger import log_query_and_results
7
 
8
  def sanitize_markdown(md_text: str) -> str:
9
+ return re.sub(r"!\[.*?\]\(.*?\)", "", md_text)
10
 
11
 
12
  def build_chat_fn(retriever, intent_classifier):
 
17
  genres=None,
18
  providers=None,
19
  year_range=None,
20
+ session_id=None,
21
+ query_id=None,
22
+ device_info=None
23
  ):
24
  full_t0 = time.time()
25
+
26
  with ThreadPoolExecutor() as executor:
27
  # Classify user intent to determine if it is a recommendation ask
28
  t0 = time.time()
 
49
  t0 = time.time()
50
  sparse_vector = retriever.embed_sparse(question, media_type)
51
  print(f"πŸ“ˆ embed_sparse() result received in {time.time() - t0:.3f}s")
52
+
53
  if is_rec_intent:
54
+ t0 = time.time()
55
+ retrieved_movies, scored_lookup = retriever.retrieve_and_rerank(
 
56
  dense_vector,
57
  sparse_vector,
58
  media_type.lower(),
 
62
  )
63
  print(f"\nπŸ“š retrieve_and_rerank() took {time.time() - t0:.3f}s")
64
 
65
+ query_entry = {
66
+ "query_id": query_id,
67
+ "session_id": session_id,
68
+ "question": question,
69
+ "intent": "recommendation",
70
+ "media_type": media_type,
71
+ "genres": genres,
72
+ "providers": providers,
73
+ "year_start": year_range[0],
74
+ "year_end": year_range[1],
75
+ "device_type": device_info.device_type,
76
+ "platform" : device_info.platform,
77
+ "user_agent": device_info.user_agent
78
+ }
79
+
80
+ result_entries = []
81
+ for rank, p in enumerate (retrieved_movies):
82
+ s = scored_lookup[p.id]
83
+ result_entries.append({
84
+ "query_id": query_id,
85
+ "media_type": media_type,
86
+ "media_id": p.payload["media_id"],
87
+ "title": p.payload["title"],
88
+ "rank": rank + 1,
89
+ "dense_score": s["dense_score"],
90
+ "sparse_score": s["sparse_score"],
91
+ "reranked_score": s["reranked_score"],
92
+ "is_final_rec": False
93
+ })
94
+
95
+ try:
96
+ log_query_and_results(query_entry, result_entries)
97
+ except Exception as e:
98
+ print("⚠️ Failed to log to Supabase:", e)
99
+
100
+ yield "[[MODE:recommendation]]\n"
101
+
102
  context = retriever.format_context(retrieved_movies)
103
  user_message = f"{question}\n\nContext:\nBased on the following retrieved {media_type.lower()}, suggest the best recommendations.\n\n{context}"
104
+
105
+ print(
106
+ f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s"
107
+ )
108
+
109
  for chunk in call_chat_model_openai(history, user_message):
110
  yield chunk
111
 
112
  else:
113
+ log_query_and_results(
114
+ query_entry={
115
+ "query_id": query_id,
116
+ "session_id": session_id,
117
+ "question": question,
118
+ "intent": "chat",
119
+ "media_type": media_type,
120
+ },
121
+ result_entries=[]
122
+ )
123
 
124
+ user_message = f"The user did not ask for a recommendation. Ask them to be more specific. Answer this as a general question: {question}"
125
+
126
+ print(
127
+ f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s"
128
+ )
129
+
130
+ yield "[[MODE:chat]]\n"
131
+
132
  for chunk in call_chat_model_openai(history, user_message):
133
  yield sanitize_markdown(chunk)
134
 
app/services/usage_logger.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ import httpx
4
+ from app.core.config import SUPABASE_API_KEY, SUPABASE_URL
5
+
6
+
7
+ def log_query_and_results(
8
+ query_entry: dict,
9
+ result_entries: list[dict],
10
+ ):
11
+ if not SUPABASE_URL or not SUPABASE_API_KEY:
12
+ print("⚠️ Missing Supabase config, skipping log.")
13
+ return
14
+
15
+ timestamp = datetime.utcnow().isoformat()
16
+ query_entry.setdefault("created_at", timestamp)
17
+ for r in result_entries:
18
+ r.setdefault("created_at", timestamp)
19
+
20
+ headers = {
21
+ "apikey": SUPABASE_API_KEY,
22
+ "Authorization": f"Bearer {SUPABASE_API_KEY}",
23
+ "Content-Type": "application/json",
24
+ "Prefer": "resolution=merge-duplicates"
25
+ }
26
+
27
+ try:
28
+ # 1. Insert usage log
29
+ query_resp = httpx.post(
30
+ f"{SUPABASE_URL}/rest/v1/query_logs",
31
+ headers=headers,
32
+ json=[query_entry]
33
+ )
34
+
35
+ if query_resp.status_code not in (200, 201, 204):
36
+ print("⚠️ Failed to log usage:", query_resp.text)
37
+
38
+ # 2. Insert result logs in batch
39
+ if result_entries:
40
+ result_resp = httpx.post(
41
+ f"{SUPABASE_URL}/rest/v1/result_logs",
42
+ headers=headers,
43
+ json=result_entries
44
+ )
45
+ if result_resp.status_code not in (200, 201, 204):
46
+ print("⚠️ Failed to log results:", result_resp.text)
47
+
48
+ except Exception as e:
49
+ print("❌ Logging error:", e)
50
+
51
+
52
+ def log_final_results(result_entries: list[dict]):
53
+ if not SUPABASE_URL or not SUPABASE_API_KEY:
54
+ print("⚠️ Missing Supabase config, skipping log.")
55
+ return
56
+
57
+ headers = {
58
+ "apikey": SUPABASE_API_KEY,
59
+ "Authorization": f"Bearer {SUPABASE_API_KEY}",
60
+ "Content-Type": "application/json",
61
+ "Prefer": "resolution=merge-duplicates"
62
+ }
63
+
64
+ try:
65
+ result_resp = httpx.post(
66
+ f"{SUPABASE_URL}/rest/v1/result_logs",
67
+ headers=headers,
68
+ json=result_entries
69
+ )
70
+ if result_resp.status_code not in (200, 201, 204):
71
+ print("⚠️ Failed to log final results:", result_resp.text)
72
+ except Exception as e:
73
+ print("❌ Error in write_final_results:", e)
74
+
75
+
76
+
data/bm25_files/movie_bm25_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aaf32789542f07efa41b2f3a6023110e0f9df84f354ce50fc67173ed57eb9e0
3
- size 6617501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829dfd9b3e50992a2617415e4d45e05eb8b887206e77ff5250416b7cd5dac0d2
3
+ size 6637836
data/bm25_files/movie_bm25_vocab.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:559fe41c00d8e94d2e516f8278b5950f2e02b4938151cdc0e23986ae6936513b
3
- size 454110
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1304c2611aa1072156fe60aa0298934dc72a8f85c5f484ba49ff641223dadf
3
+ size 688111
data/bm25_files/tv_bm25_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf700b0207c36e600f176b8151c195f2b3d6f9950dc70b10c922c06706224fc
3
- size 5680147
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b7275086436201a11b2027fa28727d8d05cf6c3180bb68de2cd0deb12e5dc62
3
+ size 5712382
data/bm25_files/tv_bm25_vocab.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eab388573cc6b4b7dc5b70cd13bdb7dcaaf0a7f261a9cf980c3251a33425d8a0
3
- size 489786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db37c47df4a7f73e792c479d72095a62a35389be3ed01997aa60ab57d0e320a5
3
+ size 497815
main.py CHANGED
@@ -1,4 +1,4 @@
1
- from app.api_routes import router
2
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
@@ -20,3 +20,7 @@ app.include_router(router)
20
  @app.get("/health")
21
  def health_check():
22
  return {"status": "ok"}
 
 
 
 
 
1
+ from app.api.api_routes import router
2
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
 
20
  @app.get("/health")
21
  def health_check():
22
  return {"status": "ok"}
23
+
24
+ @app.get("/")
25
+ def read_root():
26
+ return {"status": "ok"}