File size: 3,037 Bytes
b68e1c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1aaa487
 
b68e1c5
 
 
 
 
 
 
 
 
 
1aaa487
b68e1c5
 
1aaa487
b68e1c5
 
 
 
 
1aaa487
 
 
 
b68e1c5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import re
import time
from concurrent.futures import ThreadPoolExecutor

from app.llm_services import call_chat_model_openai


def sanitize_markdown(md_text: str) -> str:
    return re.sub(r'!\[.*?\]\(.*?\)', '', md_text)


def build_chat_fn(retriever, intent_classifier):
    def chat(
        question,
        history,
        media_type="movies",
        genres=None,
        providers=None,
        year_range=None,
    ):
        full_t0 = time.time()
        
        with ThreadPoolExecutor() as executor:
            # Classify user intent to determine if it is a recommendation ask
            t0 = time.time()
            intent_future = executor.submit(
                lambda q: intent_classifier(q)[0]["label"] == "recommendation", question
            )
            print(f"\n🧠 executor.submit(classify_intent) took {time.time() - t0:.3f}s")

            # Embed user query as dense vector asynchronously
            t0 = time.time()
            query_vector_future = executor.submit(retriever.embed_dense, question)
            print(f"🧡 executor.submit(embed_text) took {time.time() - t0:.3f}s")

            # Wait for results
            t0 = time.time()
            is_rec_intent = intent_future.result()
            print(f"βœ… classify_intent() result received in {time.time() - t0:.3f}s")

            t0 = time.time()
            dense_vector = query_vector_future.result()
            print(f"πŸ“ˆ embed_text() result received in {time.time() - t0:.3f}s")

        # Embed user query as sparse vector for hybrid retrieval
        t0 = time.time()
        sparse_vector = retriever.embed_sparse(question, media_type)
        print(f"πŸ“ˆ embed_sparse() result received in {time.time() - t0:.3f}s")
        
        if is_rec_intent:
            yield "[[MODE:recommendation]]\n"

            t0 = time.time()            
            retrieved_movies = retriever.retrieve_and_rerank(
                dense_vector,
                sparse_vector,
                media_type.lower(),
                genres,
                providers,
                year_range,
            )
            print(f"\nπŸ“š retrieve_and_rerank() took {time.time() - t0:.3f}s")

            context = retriever.format_context(retrieved_movies)
            user_message = f"{question}\n\nContext:\nBased on the following retrieved {media_type.lower()}, suggest the best recommendations.\n\n{context}"

            print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
            for chunk in call_chat_model_openai(history, user_message):
                yield chunk

        else:
            yield "[[MODE:chat]]\n"

            user_message = f"The user did not ask for a recommendation. Ask them to be more specific. Answer this as a general question: {question}" 

            print(f"✨ Total chat() prep time before streaming: {time.time() - full_t0:.3f}s")
            for chunk in call_chat_model_openai(history, user_message):
                yield sanitize_markdown(chunk)

    return chat