Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ model_file_path = hf_hub_download(
|
|
8 |
filename="llama-2-7b.Q4_0.gguf"
|
9 |
)
|
10 |
|
11 |
-
#
|
12 |
try:
|
13 |
llm_llama_cpp = Llama(
|
14 |
model_path=model_file_path, # Path where the model is downloaded
|
@@ -16,26 +16,28 @@ try:
|
|
16 |
n_ctx=4096 # Set context window to match model's full capacity
|
17 |
)
|
18 |
|
19 |
-
#
|
20 |
def talk(prompt, history):
|
21 |
try:
|
22 |
-
# Generate text with streaming
|
23 |
response_stream = llm_llama_cpp.create_completion(
|
24 |
prompt,
|
25 |
max_tokens=200, # You can adjust the max tokens as needed
|
26 |
stream=True
|
27 |
)
|
28 |
|
|
|
29 |
response = ""
|
30 |
for chunk in response_stream:
|
31 |
-
# Extract the text from
|
32 |
if 'choices' in chunk and len(chunk['choices']) > 0 and 'text' in chunk['choices'][0]:
|
33 |
response += chunk['choices'][0]['text']
|
|
|
34 |
|
35 |
-
#
|
36 |
return response
|
37 |
|
38 |
except Exception as e:
|
|
|
39 |
return f"Error with llama-cpp-python: {e}"
|
40 |
|
41 |
except FileNotFoundError:
|
@@ -47,7 +49,7 @@ except Exception as e:
|
|
47 |
TITLE = "AI Copilot for Diabetes Patients"
|
48 |
DESCRIPTION = "I provide answers to concerns related to Diabetes"
|
49 |
|
50 |
-
# Design chatbot interface (fixed `likeable` argument)
|
51 |
demo = gr.ChatInterface(
|
52 |
fn=talk, # The function that processes user input and returns the response
|
53 |
chatbot=gr.Chatbot(
|
@@ -55,7 +57,7 @@ demo = gr.ChatInterface(
|
|
55 |
show_share_button=True,
|
56 |
show_copy_button=True,
|
57 |
layout="bubble", # Display messages in bubble format
|
58 |
-
|
59 |
),
|
60 |
theme="Soft", # Soft theme for the UI
|
61 |
examples=[["what is Diabetes?"]], # Example query to get started
|
@@ -64,4 +66,4 @@ demo = gr.ChatInterface(
|
|
64 |
)
|
65 |
|
66 |
# Launch the chatbot interface
|
67 |
-
demo.launch()
|
|
|
8 |
filename="llama-2-7b.Q4_0.gguf"
|
9 |
)
|
10 |
|
11 |
+
# Initialize the Llama model
|
12 |
try:
|
13 |
llm_llama_cpp = Llama(
|
14 |
model_path=model_file_path, # Path where the model is downloaded
|
|
|
16 |
n_ctx=4096 # Set context window to match model's full capacity
|
17 |
)
|
18 |
|
19 |
+
# Define the function for generating text with streaming
|
20 |
def talk(prompt, history):
|
21 |
try:
|
|
|
22 |
response_stream = llm_llama_cpp.create_completion(
|
23 |
prompt,
|
24 |
max_tokens=200, # You can adjust the max tokens as needed
|
25 |
stream=True
|
26 |
)
|
27 |
|
28 |
+
# Prepare a response variable to store the final result
|
29 |
response = ""
|
30 |
for chunk in response_stream:
|
31 |
+
# Extract and accumulate the text from each chunk
|
32 |
if 'choices' in chunk and len(chunk['choices']) > 0 and 'text' in chunk['choices'][0]:
|
33 |
response += chunk['choices'][0]['text']
|
34 |
+
print(f"Streaming: {chunk['choices'][0]['text']}", end="", flush=True)
|
35 |
|
36 |
+
# After the stream is complete, return the final response
|
37 |
return response
|
38 |
|
39 |
except Exception as e:
|
40 |
+
print(f"Error in generating response: {e}")
|
41 |
return f"Error with llama-cpp-python: {e}"
|
42 |
|
43 |
except FileNotFoundError:
|
|
|
49 |
TITLE = "AI Copilot for Diabetes Patients"
|
50 |
DESCRIPTION = "I provide answers to concerns related to Diabetes"
|
51 |
|
52 |
+
# Design chatbot interface (fixed `likeable` argument, deprecated params removed)
|
53 |
demo = gr.ChatInterface(
|
54 |
fn=talk, # The function that processes user input and returns the response
|
55 |
chatbot=gr.Chatbot(
|
|
|
57 |
show_share_button=True,
|
58 |
show_copy_button=True,
|
59 |
layout="bubble", # Display messages in bubble format
|
60 |
+
type="messages", # Use OpenAI-style message format
|
61 |
),
|
62 |
theme="Soft", # Soft theme for the UI
|
63 |
examples=[["what is Diabetes?"]], # Example query to get started
|
|
|
66 |
)
|
67 |
|
68 |
# Launch the chatbot interface
|
69 |
+
demo.launch()
|