Tim Luka Horstmann commited on
Commit
95c3613
·
1 Parent(s): 392cd96
Files changed (1) hide show
  1. app.py +28 -21
app.py CHANGED
@@ -96,25 +96,28 @@ def retrieve_context(query, top_k=2):
96
  except Exception as e:
97
  logger.error(f"Error in retrieve_context: {str(e)}")
98
  raise
 
 
 
99
 
100
  def stream_response(query):
101
  logger.info(f"Processing query: {query}")
102
  start_time = time.time()
103
  first_token_logged = False
104
 
105
- # FAQ check first
106
- query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
107
- query_embedding = query_embedding.reshape(1, -1)
108
- faiss.normalize_L2(query_embedding)
109
- similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
110
- max_sim = np.max(similarities)
111
- if max_sim > 0.9:
112
- idx = np.argmax(similarities)
113
- yield f"data: {faqs[idx]['answer']}\n\n"
114
- yield "data: [DONE]\n\n"
115
- return
116
-
117
- context = retrieve_context(query, top_k=2)
118
  messages = [
119
  {
120
  "role": "system",
@@ -122,8 +125,8 @@ def stream_response(query):
122
  "You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
123
  "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
124
  "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
125
- "and say 'I dont have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
126
- f"CV: {context}"
127
  )
128
  },
129
  {"role": "user", "content": query}
@@ -144,8 +147,9 @@ def stream_response(query):
144
  if not first_token_logged and time.time() - start_time > 0:
145
  logger.info(f"First token time: {time.time() - start_time:.2f}s")
146
  first_token_logged = True
147
- # Yield when buffer contains a word boundary (space, punctuation, or reasonable length)
148
- if any(buffer.endswith(char) for char in [" ", ".", ",", "!", "?"]) or len(buffer) > 20:
 
149
  yield f"data: {buffer}\n\n"
150
  buffer = ""
151
  if buffer: # Flush remaining buffer
@@ -178,10 +182,13 @@ async def model_info():
178
  "faiss_index_dim": cv_embeddings.shape[1],
179
  }
180
 
 
 
 
181
  @app.on_event("startup")
182
  async def warm_up_model():
183
  logger.info("Warming up the model...")
184
- dummy_query = "Hi"
185
- for _ in stream_response(dummy_query):
186
- pass
187
- logger.info("Model warm-up complete.")
 
96
  except Exception as e:
97
  logger.error(f"Error in retrieve_context: {str(e)}")
98
  raise
99
+ # Load the full CV at startup
100
+ with open("../assets/documents/cv_text.txt", "r", encoding="utf-8") as f:
101
+ full_cv_text = f.read()
102
 
103
  def stream_response(query):
104
  logger.info(f"Processing query: {query}")
105
  start_time = time.time()
106
  first_token_logged = False
107
 
108
+ # FAQ check first (keep this as it's fast)
109
+ # query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
110
+ # query_embedding = query_embedding.reshape(1, -1)
111
+ # faiss.normalize_L2(query_embedding)
112
+ # similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
113
+ # max_sim = np.max(similarities)
114
+ # if max_sim > 0.9:
115
+ # idx = np.argmax(similarities)
116
+ # yield f"data: {faqs[idx]['answer']}\n\n"
117
+ # yield "data: [DONE]\n\n"
118
+ # return
119
+
120
+ # Use full CV instead of retrieved chunks
121
  messages = [
122
  {
123
  "role": "system",
 
125
  "You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
126
  "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
127
  "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
128
+ "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
129
+ f"CV: {full_cv_text}"
130
  )
131
  },
132
  {"role": "user", "content": query}
 
147
  if not first_token_logged and time.time() - start_time > 0:
148
  logger.info(f"First token time: {time.time() - start_time:.2f}s")
149
  first_token_logged = True
150
+
151
+ # More natural chunking - yield complete sentences when possible
152
+ if any(buffer.endswith(char) for char in [".", "!", "?"]) or len(buffer) > 30:
153
  yield f"data: {buffer}\n\n"
154
  buffer = ""
155
  if buffer: # Flush remaining buffer
 
182
  "faiss_index_dim": cv_embeddings.shape[1],
183
  }
184
 
185
+ # Optimize the model loading process
186
+
187
+ # Use a smaller warm-up query
188
  @app.on_event("startup")
189
  async def warm_up_model():
190
  logger.info("Warming up the model...")
191
+ dummy_query = "Hello" # Shorter query
192
+ # Just execute once to prime the model without waiting for completion
193
+ next(stream_response(dummy_query))
194
+ logger.info("Model warm-up initiated.")