Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -147,8 +147,13 @@ except Exception as e:
|
|
147 |
def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
|
148 |
"""Generate a response from the model based on an instruction."""
|
149 |
try:
|
150 |
-
#
|
151 |
-
inputs = tokenizer.encode(
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
# Generate response
|
154 |
outputs = model.generate(
|
@@ -159,13 +164,19 @@ def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
|
|
159 |
do_sample=True,
|
160 |
)
|
161 |
|
162 |
-
# Decode and
|
163 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
except Exception as e:
|
167 |
raise ValueError(f"Error generating response: {e}")
|
168 |
|
|
|
169 |
@app.post("/generate")
|
170 |
async def generate_text(input: ModelInput):
|
171 |
try:
|
|
|
147 |
def generate_response(model, tokenizer, instruction, max_new_tokens=2048):
|
148 |
"""Generate a response from the model based on an instruction."""
|
149 |
try:
|
150 |
+
# Encode input with truncation
|
151 |
+
inputs = tokenizer.encode(
|
152 |
+
instruction,
|
153 |
+
return_tensors="pt",
|
154 |
+
truncation=True,
|
155 |
+
max_length=tokenizer.model_max_length
|
156 |
+
).to(model.device)
|
157 |
|
158 |
# Generate response
|
159 |
outputs = model.generate(
|
|
|
164 |
do_sample=True,
|
165 |
)
|
166 |
|
167 |
+
# Decode and strip input prompt from response
|
168 |
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
169 |
+
generated_text = response[len(instruction):].strip()
|
170 |
+
|
171 |
+
print(f"Instruction: {instruction}") # Debugging line
|
172 |
+
print(f"Generated Response: {generated_text}") # Debugging line
|
173 |
+
|
174 |
+
return generated_text
|
175 |
|
176 |
except Exception as e:
|
177 |
raise ValueError(f"Error generating response: {e}")
|
178 |
|
179 |
+
|
180 |
@app.post("/generate")
|
181 |
async def generate_text(input: ModelInput):
|
182 |
try:
|