Spaces:

Luka512
/

website

Running

App Files Files Community

Tim Luka Horstmann commited on Apr 10

Commit

95c3613

1 Parent(s): 392cd96

No RAG

Browse files

Files changed (1) hide show

app.py +28 -21

app.py CHANGED Viewed

@@ -96,25 +96,28 @@ def retrieve_context(query, top_k=2):
     except Exception as e:
         logger.error(f"Error in retrieve_context: {str(e)}")
         raise
 def stream_response(query):
     logger.info(f"Processing query: {query}")
     start_time = time.time()
     first_token_logged = False
-    # FAQ check first
-    query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
-    query_embedding = query_embedding.reshape(1, -1)
-    faiss.normalize_L2(query_embedding)
-    similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
-    max_sim = np.max(similarities)
-    if max_sim > 0.9:
-        idx = np.argmax(similarities)
-        yield f"data: {faqs[idx]['answer']}\n\n"
-        yield "data: [DONE]\n\n"
-        return
-    context = retrieve_context(query, top_k=2)
     messages = [
         {
             "role": "system",
@@ -122,8 +125,8 @@ def stream_response(query):
                 "You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
                 "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
                 "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
-                "and say 'I don’t have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
-                f"CV: {context}"
             )
         },
         {"role": "user", "content": query}
@@ -144,8 +147,9 @@ def stream_response(query):
             if not first_token_logged and time.time() - start_time > 0:
                 logger.info(f"First token time: {time.time() - start_time:.2f}s")
                 first_token_logged = True
-            # Yield when buffer contains a word boundary (space, punctuation, or reasonable length)
-            if any(buffer.endswith(char) for char in [" ", ".", ",", "!", "?"]) or len(buffer) > 20:
                 yield f"data: {buffer}\n\n"
                 buffer = ""
     if buffer:  # Flush remaining buffer
@@ -178,10 +182,13 @@ async def model_info():
         "faiss_index_dim": cv_embeddings.shape[1],
     }
 @app.on_event("startup")
 async def warm_up_model():
     logger.info("Warming up the model...")
-    dummy_query = "Hi"
-    for _ in stream_response(dummy_query):
-        pass
-    logger.info("Model warm-up complete.")

     except Exception as e:
         logger.error(f"Error in retrieve_context: {str(e)}")
         raise
+# Load the full CV at startup
+with open("../assets/documents/cv_text.txt", "r", encoding="utf-8") as f:
+    full_cv_text = f.read()
 def stream_response(query):
     logger.info(f"Processing query: {query}")
     start_time = time.time()
     first_token_logged = False
+    # FAQ check first (keep this as it's fast)
+    # query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
+    # query_embedding = query_embedding.reshape(1, -1)
+    # faiss.normalize_L2(query_embedding)
+    # similarities = np.dot(faq_embeddings, query_embedding.T).flatten()
+    # max_sim = np.max(similarities)
+    # if max_sim > 0.9:
+    #     idx = np.argmax(similarities)
+    #     yield f"data: {faqs[idx]['answer']}\n\n"
+    #     yield "data: [DONE]\n\n"
+    #     return
+    # Use full CV instead of retrieved chunks
     messages = [
         {
             "role": "system",
                 "You are Tim Luka Horstmann, a Computer Scientist. A user is asking you a question. Respond as yourself, using the first person, in a friendly and concise manner. "
                 "For questions about your CV, base your answer *exclusively* on the provided CV information below and do not add any details not explicitly stated. "
                 "For casual questions not covered by the CV, respond naturally but limit answers to general truths about yourself (e.g., your current location is Paris, France, or your field is AI) "
+                "and say 'I don't have specific details to share about that' if pressed for specifics beyond the CV or FAQs. Do not invent facts, experiences, or opinions not supported by the CV or FAQs. "
+                f"CV: {full_cv_text}"
             )
         },
         {"role": "user", "content": query}
             if not first_token_logged and time.time() - start_time > 0:
                 logger.info(f"First token time: {time.time() - start_time:.2f}s")
                 first_token_logged = True
+            # More natural chunking - yield complete sentences when possible
+            if any(buffer.endswith(char) for char in [".", "!", "?"]) or len(buffer) > 30:
                 yield f"data: {buffer}\n\n"
                 buffer = ""
     if buffer:  # Flush remaining buffer
         "faiss_index_dim": cv_embeddings.shape[1],
     }
+# Optimize the model loading process
+# Use a smaller warm-up query
 @app.on_event("startup")
 async def warm_up_model():
     logger.info("Warming up the model...")
+    dummy_query = "Hello"  # Shorter query
+    # Just execute once to prime the model without waiting for completion
+    next(stream_response(dummy_query))
+    logger.info("Model warm-up initiated.")