segestic commited on
Commit
e95d4c9
·
verified ·
1 Parent(s): 2ea7645

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -48
app.py CHANGED
@@ -1,69 +1,77 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
 
4
 
5
- # Download model from Hugging Face (if not already present)
6
- model_file_path = hf_hub_download(
7
- repo_id="TheBloke/Llama-2-7B-GGUF",
8
- filename="llama-2-7b.Q4_0.gguf"
9
- )
10
 
11
- # Initialize the Llama model
12
- try:
13
- llm_llama_cpp = Llama(
14
- model_path=model_file_path, # Path where the model is downloaded
15
- verbose=False, # Suppress llama.cpp's own informational prints
16
- n_ctx=4096 # Set context window to match model's full capacity
17
- )
18
 
19
- # Define the function for generating text with streaming
20
- def talk(prompt, history):
21
- try:
22
- response_stream = llm_llama_cpp.create_completion(
23
- prompt,
24
- max_tokens=200, # You can adjust the max tokens as needed
25
- stream=True
26
- )
 
27
 
28
- # Prepare a response variable to store the final result
29
- response = ""
30
- for chunk in response_stream:
31
- # Extract and accumulate the text from each chunk
32
- if 'choices' in chunk and len(chunk['choices']) > 0 and 'text' in chunk['choices'][0]:
33
- response += chunk['choices'][0]['text']
34
- print(f"Streaming: {chunk['choices'][0]['text']}", end="", flush=True)
 
 
 
35
 
36
- # After the stream is complete, return the final response
37
- return response
38
 
39
- except Exception as e:
40
- print(f"Error in generating response: {e}")
41
- return f"Error with llama-cpp-python: {e}"
 
 
 
 
 
 
 
 
 
42
 
43
- except FileNotFoundError:
44
- print(f"Error: Model file not found at {model_file_path}")
45
- except Exception as e:
46
- print(f"Error with llama-cpp-python: {e}")
47
 
48
- # Gradio interface setup
49
- TITLE = "AI Copilot for Diabetes Patients"
50
- DESCRIPTION = "I provide answers to concerns related to Diabetes"
51
 
52
- # Design chatbot interface (fixed `likeable` argument, deprecated params removed)
53
  demo = gr.ChatInterface(
54
- fn=talk, # The function that processes user input and returns the response
55
  chatbot=gr.Chatbot(
56
  show_label=True,
57
  show_share_button=True,
58
  show_copy_button=True,
59
- layout="bubble", # Display messages in bubble format
60
- type="messages", # Use OpenAI-style message format
61
  ),
62
- theme="Soft", # Soft theme for the UI
63
- examples=[["what is Diabetes?"]], # Example query to get started
64
- title=TITLE, # Title of the interface
65
- description=DESCRIPTION, # Description for context
66
  )
67
 
68
- # Launch the chatbot interface
69
  demo.launch()
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
+ import threading
5
 
6
+ # Title and description
7
+ TITLE = "AI Copilot for Diabetes Patients"
8
+ DESCRIPTION = "I provide answers to concerns related to Diabetes"
 
 
9
 
10
+ # Globals
11
+ llm_llama_cpp = None
12
+ model_ready = False
 
 
 
 
13
 
14
+ # Download and initialize model in background
15
+ def load_model():
16
+ global llm_llama_cpp, model_ready
17
+ try:
18
+ print("Downloading model...")
19
+ model_file_path = hf_hub_download(
20
+ repo_id="TheBloke/Llama-2-7B-GGUF",
21
+ filename="llama-2-7b.Q4_0.gguf"
22
+ )
23
 
24
+ print("Initializing model...")
25
+ llm_llama_cpp = Llama(
26
+ model_path=model_file_path,
27
+ verbose=False,
28
+ n_ctx=4096
29
+ )
30
+ model_ready = True
31
+ print("Model is ready.")
32
+ except Exception as e:
33
+ print(f"Failed to load model: {e}")
34
 
35
+ # Background thread for model loading
36
+ threading.Thread(target=load_model).start()
37
 
38
+ # Chatbot logic
39
+ def talk(prompt, history):
40
+ if not model_ready:
41
+ return "⏳ Please wait, the model is still loading..."
42
+
43
+ try:
44
+ response = ""
45
+ response_stream = llm_llama_cpp.create_completion(
46
+ prompt=prompt,
47
+ max_tokens=200,
48
+ stream=True
49
+ )
50
 
51
+ for chunk in response_stream:
52
+ if 'choices' in chunk and 'text' in chunk['choices'][0]:
53
+ response += chunk['choices'][0]['text']
54
+ return response
55
 
56
+ except Exception as e:
57
+ print(f"Error in generating response: {e}")
58
+ return f"Error during response generation: {e}"
59
 
60
+ # Gradio interface
61
  demo = gr.ChatInterface(
62
+ fn=talk,
63
  chatbot=gr.Chatbot(
64
  show_label=True,
65
  show_share_button=True,
66
  show_copy_button=True,
67
+ layout="bubble",
68
+ type="messages",
69
  ),
70
+ theme="Soft",
71
+ examples=[["what is Diabetes?"]],
72
+ title=TITLE,
73
+ description=DESCRIPTION,
74
  )
75
 
76
+ # Launch the UI
77
  demo.launch()