chatcpu / app.py
segestic's picture
Update app.py
f0be88b verified
raw
history blame
2.33 kB
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Download model from Hugging Face (if not already present)
model_file_path = hf_hub_download(
repo_id="TheBloke/Llama-2-7B-GGUF",
filename="llama-2-7b.Q4_0.gguf"
)
# Try to load the model from the downloaded file
try:
llm_llama_cpp = Llama(
model_path=model_file_path, # Path where the model is downloaded
verbose=False, # Suppress llama.cpp's own informational prints
n_ctx=4096 # Set context window to match model's full capacity
)
# Function that generates a response using the Llama model
def talk(prompt, history):
try:
# Generate text with streaming
response_stream = llm_llama_cpp.create_completion(
prompt,
max_tokens=200, # You can adjust the max tokens as needed
stream=True
)
response = ""
for chunk in response_stream:
# Extract the text from the current chunk
if 'choices' in chunk and len(chunk['choices']) > 0 and 'text' in chunk['choices'][0]:
response += chunk['choices'][0]['text']
# Return the full response after streaming is done
return response
except Exception as e:
return f"Error with llama-cpp-python: {e}"
except FileNotFoundError:
print(f"Error: Model file not found at {model_file_path}")
except Exception as e:
print(f"Error with llama-cpp-python: {e}")
# Gradio interface setup
TITLE = "AI Copilot for Diabetes Patients"
DESCRIPTION = "I provide answers to concerns related to Diabetes"
# Design chatbot interface (fixed `likeable` argument)
demo = gr.ChatInterface(
fn=talk, # The function that processes user input and returns the response
chatbot=gr.Chatbot(
show_label=True,
show_share_button=True,
show_copy_button=True,
layout="bubble", # Display messages in bubble format
bubble_full_width=False,
),
theme="Soft", # Soft theme for the UI
examples=[["what is Diabetes?"]], # Example query to get started
title=TITLE, # Title of the interface
description=DESCRIPTION, # Description for context
)
# Launch the chatbot interface
demo.launch()