Tim Luka Horstmann
commited on
Commit
·
59b5835
1
Parent(s):
bdbefdd
fixed streaming
Browse files
app.py
CHANGED
@@ -100,6 +100,7 @@ def retrieve_context(query, top_k=2):
|
|
100 |
def stream_response(query):
|
101 |
logger.info(f"Processing query: {query}")
|
102 |
start_time = time.time()
|
|
|
103 |
|
104 |
# FAQ check first
|
105 |
query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
@@ -132,16 +133,15 @@ def stream_response(query):
|
|
132 |
text = chunk['choices'][0]['delta'].get('content', '')
|
133 |
if text:
|
134 |
buffer += text
|
|
|
|
|
|
|
135 |
if buffer.endswith(" ") or buffer.endswith(".") or buffer.endswith("!"):
|
136 |
yield f"data: {buffer}\n\n"
|
137 |
buffer = ""
|
138 |
-
|
139 |
-
logger.info(f"First token time: {time.time() - start_time:.2f}s")
|
140 |
-
break
|
141 |
-
if buffer:
|
142 |
yield f"data: {buffer}\n\n"
|
143 |
yield "data: [DONE]\n\n"
|
144 |
-
|
145 |
class QueryRequest(BaseModel):
|
146 |
data: list
|
147 |
|
|
|
100 |
def stream_response(query):
|
101 |
logger.info(f"Processing query: {query}")
|
102 |
start_time = time.time()
|
103 |
+
first_token_logged = False # Flag to log first token time only once
|
104 |
|
105 |
# FAQ check first
|
106 |
query_embedding = embedder.encode(query, convert_to_numpy=True).astype("float32")
|
|
|
133 |
text = chunk['choices'][0]['delta'].get('content', '')
|
134 |
if text:
|
135 |
buffer += text
|
136 |
+
if not first_token_logged and time.time() - start_time > 0: # Log first token once
|
137 |
+
logger.info(f"First token time: {time.time() - start_time:.2f}s")
|
138 |
+
first_token_logged = True
|
139 |
if buffer.endswith(" ") or buffer.endswith(".") or buffer.endswith("!"):
|
140 |
yield f"data: {buffer}\n\n"
|
141 |
buffer = ""
|
142 |
+
if buffer: # Flush any remaining buffer
|
|
|
|
|
|
|
143 |
yield f"data: {buffer}\n\n"
|
144 |
yield "data: [DONE]\n\n"
|
|
|
145 |
class QueryRequest(BaseModel):
|
146 |
data: list
|
147 |
|