Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from optimum.intel import OVModelForCausalLM
|
3 |
from transformers import AutoTokenizer, pipeline
|
4 |
|
@@ -12,6 +13,9 @@ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
12 |
|
13 |
def respond(message):
|
14 |
try:
|
|
|
|
|
|
|
15 |
# Only use the current message as input (no history)
|
16 |
input_text = f"User: {message}"
|
17 |
|
@@ -26,6 +30,10 @@ def respond(message):
|
|
26 |
)
|
27 |
reply = response[0]['generated_text'].strip()
|
28 |
|
|
|
|
|
|
|
|
|
29 |
# Return as a tuple (user message, bot reply)
|
30 |
return [(message, reply)]
|
31 |
|
|
|
1 |
import gradio as gr
|
2 |
+
import time
|
3 |
from optimum.intel import OVModelForCausalLM
|
4 |
from transformers import AutoTokenizer, pipeline
|
5 |
|
|
|
13 |
|
14 |
def respond(message):
|
15 |
try:
|
16 |
+
# Record the start time
|
17 |
+
start_time = time.time()
|
18 |
+
|
19 |
# Only use the current message as input (no history)
|
20 |
input_text = f"User: {message}"
|
21 |
|
|
|
30 |
)
|
31 |
reply = response[0]['generated_text'].strip()
|
32 |
|
33 |
+
# Calculate inference time
|
34 |
+
inference_time = time.time() - start_time
|
35 |
+
print(f"Inference time: {inference_time:.4f} seconds")
|
36 |
+
|
37 |
# Return as a tuple (user message, bot reply)
|
38 |
return [(message, reply)]
|
39 |
|