Tim Luka Horstmann
commited on
Commit
·
3bbf0cd
1
Parent(s):
6f6e59d
Fixed
Browse files- app.py +47 -22
- cv_text.txt +2 -3
app.py
CHANGED
@@ -101,9 +101,17 @@ def retrieve_context(query, top_k=2):
|
|
101 |
logger.error(f"Error in retrieve_context: {str(e)}")
|
102 |
raise
|
103 |
|
104 |
-
# Load the full CV at startup
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
async def stream_response(query, history):
|
109 |
logger.info(f"Processing query: {query}")
|
@@ -121,20 +129,32 @@ async def stream_response(query, history):
|
|
121 |
f"CV: {full_cv_text}"
|
122 |
)
|
123 |
|
|
|
|
|
|
|
|
|
|
|
124 |
# Combine system prompt, history, and current query
|
125 |
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": query}]
|
126 |
|
127 |
# Estimate token counts and truncate history if necessary
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
total_tokens = system_tokens + query_tokens + sum(history_tokens) + len(history) * 10 + 10 # Rough estimate for formatting
|
132 |
|
133 |
max_allowed_tokens = generator.n_ctx - 512 - 100 # max_tokens=512, safety_margin=100
|
134 |
|
135 |
while total_tokens > max_allowed_tokens and history:
|
136 |
removed_msg = history.pop(0)
|
137 |
-
removed_tokens = len(generator.tokenize(removed_msg["content"]))
|
138 |
total_tokens -= (removed_tokens + 10)
|
139 |
|
140 |
# Reconstruct messages after possible truncation
|
@@ -142,21 +162,26 @@ async def stream_response(query, history):
|
|
142 |
|
143 |
# Generate response with lock
|
144 |
async with model_lock:
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
if
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
class QueryRequest(BaseModel):
|
162 |
query: str
|
|
|
101 |
logger.error(f"Error in retrieve_context: {str(e)}")
|
102 |
raise
|
103 |
|
104 |
+
# Load the full CV at startup with explicit UTF-8 handling
|
105 |
+
try:
|
106 |
+
with open("cv_text.txt", "r", encoding="utf-8") as f:
|
107 |
+
full_cv_text = f.read()
|
108 |
+
# Ensure full_cv_text is a string
|
109 |
+
if not isinstance(full_cv_text, str):
|
110 |
+
full_cv_text = str(full_cv_text)
|
111 |
+
logger.info("CV text loaded successfully")
|
112 |
+
except Exception as e:
|
113 |
+
logger.error(f"Error loading cv_text.txt: {str(e)}")
|
114 |
+
raise
|
115 |
|
116 |
async def stream_response(query, history):
|
117 |
logger.info(f"Processing query: {query}")
|
|
|
129 |
f"CV: {full_cv_text}"
|
130 |
)
|
131 |
|
132 |
+
# Ensure system_prompt is a string and debug its state
|
133 |
+
if not isinstance(system_prompt, str):
|
134 |
+
system_prompt = str(system_prompt)
|
135 |
+
logger.info(f"System prompt type: {type(system_prompt)}, length: {len(system_prompt)}")
|
136 |
+
|
137 |
# Combine system prompt, history, and current query
|
138 |
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": query}]
|
139 |
|
140 |
# Estimate token counts and truncate history if necessary
|
141 |
+
try:
|
142 |
+
system_tokens = len(generator.tokenize(system_prompt.encode('utf-8'), add_bos=True, special=True))
|
143 |
+
query_tokens = len(generator.tokenize(query.encode('utf-8'), add_bos=False, special=True))
|
144 |
+
history_tokens = [len(generator.tokenize(msg["content"].encode('utf-8'), add_bos=False, special=True)) for msg in history]
|
145 |
+
except Exception as e:
|
146 |
+
logger.error(f"Tokenization error: {str(e)}")
|
147 |
+
yield f"data: Sorry, I encountered a tokenization error: {str(e)}\n\n"
|
148 |
+
yield "data: [DONE]\n\n"
|
149 |
+
return
|
150 |
+
|
151 |
total_tokens = system_tokens + query_tokens + sum(history_tokens) + len(history) * 10 + 10 # Rough estimate for formatting
|
152 |
|
153 |
max_allowed_tokens = generator.n_ctx - 512 - 100 # max_tokens=512, safety_margin=100
|
154 |
|
155 |
while total_tokens > max_allowed_tokens and history:
|
156 |
removed_msg = history.pop(0)
|
157 |
+
removed_tokens = len(generator.tokenize(removed_msg["content"].encode('utf-8'), add_bos=False, special=True))
|
158 |
total_tokens -= (removed_tokens + 10)
|
159 |
|
160 |
# Reconstruct messages after possible truncation
|
|
|
162 |
|
163 |
# Generate response with lock
|
164 |
async with model_lock:
|
165 |
+
try:
|
166 |
+
for chunk in generator.create_chat_completion(
|
167 |
+
messages=messages,
|
168 |
+
max_tokens=512,
|
169 |
+
stream=True,
|
170 |
+
temperature=0.3,
|
171 |
+
top_p=0.7,
|
172 |
+
repeat_penalty=1.2
|
173 |
+
):
|
174 |
+
token = chunk['choices'][0]['delta'].get('content', '')
|
175 |
+
if token:
|
176 |
+
if not first_token_logged:
|
177 |
+
logger.info(f"First token time: {time.time() - start_time:.2f}s")
|
178 |
+
first_token_logged = True
|
179 |
+
yield f"data: {token}\n\n"
|
180 |
+
yield "data: [DONE]\n\n"
|
181 |
+
except Exception as e:
|
182 |
+
logger.error(f"Generation error: {str(e)}")
|
183 |
+
yield f"data: Sorry, I encountered an error during generation: {str(e)}\n\n"
|
184 |
+
yield "data: [DONE]\n\n"
|
185 |
|
186 |
class QueryRequest(BaseModel):
|
187 |
query: str
|
cv_text.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
Tim Luka Horstmann
|
2 |
|
3 |
E-Mail: [email protected]
|
|
|
4 |
|
5 |
EDUCATION
|
6 |
Sep. 2024–Oct. 2025
|
@@ -87,6 +88,4 @@ Awards:
|
|
87 |
∙ Honored as one of the best Abitur graduates of NRW, Germany’s largest federal state
|
88 |
|
89 |
Interests:
|
90 |
-
Artificial Intelligence | Space Travel | Stock Trading | Piano | Rowing | Tennis | Hiking | Travelling
|
91 |
-
|
92 |
-
|
|
|
1 |
Tim Luka Horstmann
|
2 |
|
3 |
E-Mail: [email protected]
|
4 |
+
Birthdate: 05 December 2000
|
5 |
|
6 |
EDUCATION
|
7 |
Sep. 2024–Oct. 2025
|
|
|
88 |
∙ Honored as one of the best Abitur graduates of NRW, Germany’s largest federal state
|
89 |
|
90 |
Interests:
|
91 |
+
Artificial Intelligence | Space Travel | Stock Trading | Piano | Rowing | Tennis | Hiking | Travelling
|
|
|
|