Tim Luka Horstmann
commited on
Commit
·
95c3613
1
Parent(s):
392cd96
No RAG
Browse files
app.py
CHANGED
@@ -96,25 +96,28 @@ def retrieve_context(query, top_k=2):
|
|
96 |
except Exception as e:
|
97 |
logger.error(f"Error in retrieve_context: {str(e)}")
|
98 |
raise
|
|
|
|
|
|
|
99 |
|
100 |
def stream_response(query):
|
101 |
logger.info(f"Processing query: {query}")
|
102 |
start_time = time.time()
|
103 |
first_token_logged = False
|
104 |
|
105 |
-
# FAQ check first
|
106 |
-
query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
107 |
-
query_embedding = query_embedding.reshape(1, -1)
|
108 |
-
faiss.normalize_L2(query_embedding)
|
109 |
-
similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
|
110 |
-
max_sim = np.max(similarities)
|
111 |
-
if max_sim > 0.9:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
messages = [
|
119 |
{
|
120 |
"role": "system",
|
@@ -122,8 +125,8 @@ def stream_response(query):
|
|
122 |
"You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
|
123 |
"For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
|
124 |
"For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
|
125 |
-
"and say 'I don
|
126 |
-
f"CV: {
|
127 |
)
|
128 |
},
|
129 |
{"role": "user", "content": query}
|
@@ -144,8 +147,9 @@ def stream_response(query):
|
|
144 |
if not first_token_logged and time.time() - start_time > 0:
|
145 |
logger.info(f"First token time: {time.time() - start_time:.2f}s")
|
146 |
first_token_logged = True
|
147 |
-
|
148 |
-
|
|
|
149 |
yield f"data: {buffer}\n\n"
|
150 |
buffer = ""
|
151 |
if buffer: # Flush remaining buffer
|
@@ -178,10 +182,13 @@ async def model_info():
|
|
178 |
"faiss_index_dim": cv_embeddings.shape[1],
|
179 |
}
|
180 |
|
|
|
|
|
|
|
181 |
@app.on_event("startup")
|
182 |
async def warm_up_model():
|
183 |
logger.info("Warming up the model...")
|
184 |
-
dummy_query = "
|
185 |
-
|
186 |
-
|
187 |
-
logger.info("Model warm-up
|
|
|
96 |
except Exception as e:
|
97 |
logger.error(f"Error in retrieve_context: {str(e)}")
|
98 |
raise
|
99 |
+
# Load the full CV at startup
|
100 |
+
with open("../assets/documents/cv_text.txt", "r", encoding="utf-8") as f:
|
101 |
+
full_cv_text = f.read()
|
102 |
|
103 |
def stream_response(query):
|
104 |
logger.info(f"Processing query: {query}")
|
105 |
start_time = time.time()
|
106 |
first_token_logged = False
|
107 |
|
108 |
+
# FAQ check first (keep this as it's fast)
|
109 |
+
# query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
110 |
+
# query_embedding = query_embedding.reshape(1, -1)
|
111 |
+
# faiss.normalize_L2(query_embedding)
|
112 |
+
# similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
|
113 |
+
# max_sim = np.max(similarities)
|
114 |
+
# if max_sim > 0.9:
|
115 |
+
# idx = np.argmax(similarities)
|
116 |
+
# yield f"data: {faqs[idx]['answer']}\n\n"
|
117 |
+
# yield "data: [DONE]\n\n"
|
118 |
+
# return
|
119 |
+
|
120 |
+
# Use full CV instead of retrieved chunks
|
121 |
messages = [
|
122 |
{
|
123 |
"role": "system",
|
|
|
125 |
"You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
|
126 |
"For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
|
127 |
"For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
|
128 |
+
"and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
|
129 |
+
f"CV: {full_cv_text}"
|
130 |
)
|
131 |
},
|
132 |
{"role": "user", "content": query}
|
|
|
147 |
if not first_token_logged and time.time() - start_time > 0:
|
148 |
logger.info(f"First token time: {time.time() - start_time:.2f}s")
|
149 |
first_token_logged = True
|
150 |
+
|
151 |
+
# More natural chunking - yield complete sentences when possible
|
152 |
+
if any(buffer.endswith(char) for char in [".", "!", "?"]) or len(buffer) > 30:
|
153 |
yield f"data: {buffer}\n\n"
|
154 |
buffer = ""
|
155 |
if buffer: # Flush remaining buffer
|
|
|
182 |
"faiss_index_dim": cv_embeddings.shape[1],
|
183 |
}
|
184 |
|
185 |
+
# Optimize the model loading process
|
186 |
+
|
187 |
+
# Use a smaller warm-up query
|
188 |
@app.on_event("startup")
|
189 |
async def warm_up_model():
|
190 |
logger.info("Warming up the model...")
|
191 |
+
dummy_query = "Hello" # Shorter query
|
192 |
+
# Just execute once to prime the model without waiting for completion
|
193 |
+
next(stream_response(dummy_query))
|
194 |
+
logger.info("Model warm-up initiated.")
|