Tim Luka Horstmann commited on
Commit
3bbf0cd
·
1 Parent(s): 6f6e59d
Files changed (2) hide show
  1. app.py +47 -22
  2. cv_text.txt +2 -3
app.py CHANGED
@@ -101,9 +101,17 @@ def retrieve_context(query, top_k=2):
101
  logger.error(f"Error in retrieve_context: {str(e)}")
102
  raise
103
 
104
- # Load the full CV at startup
105
- with open("cv_text.txt", "r", encoding="utf-8") as f:
106
- full_cv_text = f.read()
 
 
 
 
 
 
 
 
107
 
108
  async def stream_response(query, history):
109
  logger.info(f"Processing query: {query}")
@@ -121,20 +129,32 @@ async def stream_response(query, history):
121
  f"CV: {full_cv_text}"
122
  )
123
 
 
 
 
 
 
124
  # Combine system prompt, history, and current query
125
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": query}]
126
 
127
  # Estimate token counts and truncate history if necessary
128
- system_tokens = len(generator.tokenize(system_prompt))
129
- query_tokens = len(generator.tokenize(query))
130
- history_tokens = [len(generator.tokenize(msg["content"])) for msg in history]
 
 
 
 
 
 
 
131
  total_tokens = system_tokens + query_tokens + sum(history_tokens) + len(history) * 10 + 10 # Rough estimate for formatting
132
 
133
  max_allowed_tokens = generator.n_ctx - 512 - 100 # max_tokens=512, safety_margin=100
134
 
135
  while total_tokens > max_allowed_tokens and history:
136
  removed_msg = history.pop(0)
137
- removed_tokens = len(generator.tokenize(removed_msg["content"]))
138
  total_tokens -= (removed_tokens + 10)
139
 
140
  # Reconstruct messages after possible truncation
@@ -142,21 +162,26 @@ async def stream_response(query, history):
142
 
143
  # Generate response with lock
144
  async with model_lock:
145
- for chunk in generator.create_chat_completion(
146
- messages=messages,
147
- max_tokens=512,
148
- stream=True,
149
- temperature=0.3,
150
- top_p=0.7,
151
- repeat_penalty=1.2
152
- ):
153
- token = chunk['choices'][0]['delta'].get('content', '')
154
- if token:
155
- if not first_token_logged:
156
- logger.info(f"First token time: {time.time() - start_time:.2f}s")
157
- first_token_logged = True
158
- yield f"data: {token}\n\n"
159
- yield "data: [DONE]\n\n"
 
 
 
 
 
160
 
161
  class QueryRequest(BaseModel):
162
  query: str
 
101
  logger.error(f"Error in retrieve_context: {str(e)}")
102
  raise
103
 
104
+ # Load the full CV at startup with explicit UTF-8 handling
105
+ try:
106
+ with open("cv_text.txt", "r", encoding="utf-8") as f:
107
+ full_cv_text = f.read()
108
+ # Ensure full_cv_text is a string
109
+ if not isinstance(full_cv_text, str):
110
+ full_cv_text = str(full_cv_text)
111
+ logger.info("CV text loaded successfully")
112
+ except Exception as e:
113
+ logger.error(f"Error loading cv_text.txt: {str(e)}")
114
+ raise
115
 
116
  async def stream_response(query, history):
117
  logger.info(f"Processing query: {query}")
 
129
  f"CV: {full_cv_text}"
130
  )
131
 
132
+ # Ensure system_prompt is a string and debug its state
133
+ if not isinstance(system_prompt, str):
134
+ system_prompt = str(system_prompt)
135
+ logger.info(f"System prompt type: {type(system_prompt)}, length: {len(system_prompt)}")
136
+
137
  # Combine system prompt, history, and current query
138
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": query}]
139
 
140
  # Estimate token counts and truncate history if necessary
141
+ try:
142
+ system_tokens = len(generator.tokenize(system_prompt.encode('utf-8'), add_bos=True, special=True))
143
+ query_tokens = len(generator.tokenize(query.encode('utf-8'), add_bos=False, special=True))
144
+ history_tokens = [len(generator.tokenize(msg["content"].encode('utf-8'), add_bos=False, special=True)) for msg in history]
145
+ except Exception as e:
146
+ logger.error(f"Tokenization error: {str(e)}")
147
+ yield f"data: Sorry, I encountered a tokenization error: {str(e)}\n\n"
148
+ yield "data: [DONE]\n\n"
149
+ return
150
+
151
  total_tokens = system_tokens + query_tokens + sum(history_tokens) + len(history) * 10 + 10 # Rough estimate for formatting
152
 
153
  max_allowed_tokens = generator.n_ctx - 512 - 100 # max_tokens=512, safety_margin=100
154
 
155
  while total_tokens > max_allowed_tokens and history:
156
  removed_msg = history.pop(0)
157
+ removed_tokens = len(generator.tokenize(removed_msg["content"].encode('utf-8'), add_bos=False, special=True))
158
  total_tokens -= (removed_tokens + 10)
159
 
160
  # Reconstruct messages after possible truncation
 
162
 
163
  # Generate response with lock
164
  async with model_lock:
165
+ try:
166
+ for chunk in generator.create_chat_completion(
167
+ messages=messages,
168
+ max_tokens=512,
169
+ stream=True,
170
+ temperature=0.3,
171
+ top_p=0.7,
172
+ repeat_penalty=1.2
173
+ ):
174
+ token = chunk['choices'][0]['delta'].get('content', '')
175
+ if token:
176
+ if not first_token_logged:
177
+ logger.info(f"First token time: {time.time() - start_time:.2f}s")
178
+ first_token_logged = True
179
+ yield f"data: {token}\n\n"
180
+ yield "data: [DONE]\n\n"
181
+ except Exception as e:
182
+ logger.error(f"Generation error: {str(e)}")
183
+ yield f"data: Sorry, I encountered an error during generation: {str(e)}\n\n"
184
+ yield "data: [DONE]\n\n"
185
 
186
  class QueryRequest(BaseModel):
187
  query: str
cv_text.txt CHANGED
@@ -1,6 +1,7 @@
1
  Tim Luka Horstmann
2
 
3
  E-Mail: [email protected]
 
4
 
5
  EDUCATION
6
  Sep. 2024–Oct. 2025
@@ -87,6 +88,4 @@ Awards:
87
  ∙ Honored as one of the best Abitur graduates of NRW, Germany’s largest federal state
88
 
89
  Interests:
90
- Artificial Intelligence | Space Travel | Stock Trading | Piano | Rowing | Tennis | Hiking | Travelling
91
-
92
-
 
1
  Tim Luka Horstmann
2
 
3
  E-Mail: [email protected]
4
+ Birthdate: 05 December 2000
5
 
6
  EDUCATION
7
  Sep. 2024–Oct. 2025
 
88
  ∙ Honored as one of the best Abitur graduates of NRW, Germany’s largest federal state
89
 
90
  Interests:
91
+ Artificial Intelligence | Space Travel | Stock Trading | Piano | Rowing | Tennis | Hiking | Travelling