Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
import threading
|
4 |
import time
|
5 |
from pathlib import Path
|
6 |
-
from huggingface_hub import hf_hub_download,login
|
7 |
|
8 |
# Try to import llama-cpp-python, fallback to instructions if not available
|
9 |
try:
|
@@ -23,9 +23,9 @@ HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
|
23 |
|
24 |
hf_token = os.environ.get("HF_TOKEN")
|
25 |
|
26 |
-
|
|
|
27 |
|
28 |
-
login(token=hf_token)
|
29 |
def find_gguf_file(directory="."):
|
30 |
"""Find GGUF files in the specified directory"""
|
31 |
gguf_files = []
|
@@ -35,6 +35,19 @@ def find_gguf_file(directory="."):
|
|
35 |
gguf_files.append(os.path.join(root, file))
|
36 |
return gguf_files
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def download_model_from_hf(repo_id=HF_REPO_ID, filename=HF_FILENAME):
|
39 |
"""Download GGUF model from HuggingFace Hub"""
|
40 |
try:
|
@@ -76,7 +89,7 @@ def get_optimal_settings():
|
|
76 |
|
77 |
return n_threads, n_gpu_layers
|
78 |
|
79 |
-
def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
80 |
"""Load the model from a GGUF file with automatic optimization"""
|
81 |
global model, model_loaded
|
82 |
|
@@ -87,8 +100,10 @@ def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
|
87 |
# If no path provided, try different approaches
|
88 |
if gguf_path is None:
|
89 |
if use_hf_download:
|
|
|
|
|
90 |
# Try to download from HuggingFace first
|
91 |
-
gguf_path, error = download_model_from_hf()
|
92 |
if error:
|
93 |
return False, f"❌ Failed to download from HuggingFace: {error}"
|
94 |
else:
|
@@ -123,8 +138,9 @@ def load_model_from_gguf(gguf_path=None, n_ctx=2048, use_hf_download=True):
|
|
123 |
)
|
124 |
|
125 |
model_loaded = True
|
|
|
126 |
print("Model loaded successfully!")
|
127 |
-
return True, f"✅ Model loaded successfully
|
128 |
|
129 |
except Exception as e:
|
130 |
model_loaded = False
|
@@ -195,11 +211,25 @@ def clear_chat():
|
|
195 |
"""Clear the chat history"""
|
196 |
return [], ""
|
197 |
|
198 |
-
def load_model_interface(context_size,
|
199 |
"""Interface function to load model with configurable context size"""
|
200 |
-
success, message = load_model_from_gguf(gguf_path=None, n_ctx=int(context_size), use_hf_download=
|
201 |
return message
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
def get_available_gguf_files():
|
204 |
"""Get list of available GGUF files"""
|
205 |
gguf_files = find_gguf_file()
|
@@ -220,6 +250,15 @@ def create_interface():
|
|
220 |
# Check for available models
|
221 |
availability_status = check_model_availability()
|
222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
with gr.Blocks(title="MMed-Llama-Alpaca GGUF Chatbot", theme=gr.themes.Soft()) as demo:
|
224 |
gr.HTML("""
|
225 |
<h1 style="text-align: center; color: #2E86AB; margin-bottom: 30px;">
|
@@ -256,15 +295,16 @@ def create_interface():
|
|
256 |
# Model loading section
|
257 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
258 |
|
259 |
-
#
|
260 |
-
use_hf_download = gr.Checkbox(
|
261 |
-
value=True,
|
262 |
-
label="Download from HuggingFace",
|
263 |
-
info="Uncheck to use local GGUF files"
|
264 |
-
)
|
265 |
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
270 |
model_status = gr.Textbox(
|
@@ -346,7 +386,7 @@ def create_interface():
|
|
346 |
# Event handlers
|
347 |
load_btn.click(
|
348 |
load_model_interface,
|
349 |
-
inputs=[context_size,
|
350 |
outputs=model_status
|
351 |
)
|
352 |
|
|
|
3 |
import threading
|
4 |
import time
|
5 |
from pathlib import Path
|
6 |
+
from huggingface_hub import hf_hub_download, login, list_repo_files
|
7 |
|
8 |
# Try to import llama-cpp-python, fallback to instructions if not available
|
9 |
try:
|
|
|
23 |
|
24 |
hf_token = os.environ.get("HF_TOKEN")
|
25 |
|
26 |
+
if hf_token:
|
27 |
+
login(token=hf_token)
|
28 |
|
|
|
29 |
def find_gguf_file(directory="."):
|
30 |
"""Find GGUF files in the specified directory"""
|
31 |
gguf_files = []
|
|
|
35 |
gguf_files.append(os.path.join(root, file))
|
36 |
return gguf_files
|
37 |
|
38 |
+
def get_repo_gguf_files(repo_id=HF_REPO_ID):
|
39 |
+
"""Get all GGUF files from the HuggingFace repository"""
|
40 |
+
try:
|
41 |
+
print(f"Fetching file list from {repo_id}...")
|
42 |
+
files = list_repo_files(repo_id=repo_id, token=hf_token)
|
43 |
+
gguf_files = [f for f in files if f.endswith('.gguf')]
|
44 |
+
print(f"Found {len(gguf_files)} GGUF files in repository")
|
45 |
+
return gguf_files, None
|
46 |
+
except Exception as e:
|
47 |
+
error_msg = f"Error fetching repository files: {str(e)}"
|
48 |
+
print(error_msg)
|
49 |
+
return [], error_msg
|
50 |
+
|
51 |
def download_model_from_hf(repo_id=HF_REPO_ID, filename=HF_FILENAME):
|
52 |
"""Download GGUF model from HuggingFace Hub"""
|
53 |
try:
|
|
|
89 |
|
90 |
return n_threads, n_gpu_layers
|
91 |
|
92 |
+
def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_download=True):
|
93 |
"""Load the model from a GGUF file with automatic optimization"""
|
94 |
global model, model_loaded
|
95 |
|
|
|
100 |
# If no path provided, try different approaches
|
101 |
if gguf_path is None:
|
102 |
if use_hf_download:
|
103 |
+
# Use the specified filename or default
|
104 |
+
selected_filename = filename if filename else HF_FILENAME
|
105 |
# Try to download from HuggingFace first
|
106 |
+
gguf_path, error = download_model_from_hf(filename=selected_filename)
|
107 |
if error:
|
108 |
return False, f"❌ Failed to download from HuggingFace: {error}"
|
109 |
else:
|
|
|
138 |
)
|
139 |
|
140 |
model_loaded = True
|
141 |
+
selected_filename = filename if filename else os.path.basename(gguf_path)
|
142 |
print("Model loaded successfully!")
|
143 |
+
return True, f"✅ Model loaded successfully: {selected_filename}\n📊 Context: {n_ctx} tokens\n🖥️ CPU Threads: {n_threads}\n🎮 GPU Layers: {n_gpu_layers}\n📦 Source: {HF_REPO_ID}"
|
144 |
|
145 |
except Exception as e:
|
146 |
model_loaded = False
|
|
|
211 |
"""Clear the chat history"""
|
212 |
return [], ""
|
213 |
|
214 |
+
def load_model_interface(context_size, selected_model):
|
215 |
"""Interface function to load model with configurable context size"""
|
216 |
+
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
217 |
return message
|
218 |
|
219 |
+
def refresh_model_list():
|
220 |
+
"""Refresh the list of available GGUF models from the repository"""
|
221 |
+
gguf_files, error = get_repo_gguf_files()
|
222 |
+
if error:
|
223 |
+
return gr.Dropdown(choices=["Error loading models"], value="Error loading models")
|
224 |
+
|
225 |
+
if not gguf_files:
|
226 |
+
return gr.Dropdown(choices=["No GGUF files found"], value="No GGUF files found")
|
227 |
+
|
228 |
+
# Set default value to the original default file if it exists
|
229 |
+
default_value = HF_FILENAME if HF_FILENAME in gguf_files else gguf_files[0]
|
230 |
+
|
231 |
+
return gr.Dropdown(choices=gguf_files, value=default_value)
|
232 |
+
|
233 |
def get_available_gguf_files():
|
234 |
"""Get list of available GGUF files"""
|
235 |
gguf_files = find_gguf_file()
|
|
|
250 |
# Check for available models
|
251 |
availability_status = check_model_availability()
|
252 |
|
253 |
+
# Get initial list of GGUF files from repository
|
254 |
+
gguf_files, error = get_repo_gguf_files()
|
255 |
+
if error or not gguf_files:
|
256 |
+
initial_choices = ["Error loading models" if error else "No GGUF files found"]
|
257 |
+
initial_value = initial_choices[0]
|
258 |
+
else:
|
259 |
+
initial_choices = gguf_files
|
260 |
+
initial_value = HF_FILENAME if HF_FILENAME in gguf_files else gguf_files[0]
|
261 |
+
|
262 |
with gr.Blocks(title="MMed-Llama-Alpaca GGUF Chatbot", theme=gr.themes.Soft()) as demo:
|
263 |
gr.HTML("""
|
264 |
<h1 style="text-align: center; color: #2E86AB; margin-bottom: 30px;">
|
|
|
295 |
# Model loading section
|
296 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
297 |
|
298 |
+
# gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
|
|
|
|
|
|
|
|
|
|
|
299 |
|
300 |
+
# Model selection dropdown
|
301 |
+
model_dropdown = gr.Dropdown(
|
302 |
+
choices=initial_choices,
|
303 |
+
value=initial_value,
|
304 |
+
label="Select GGUF Model",
|
305 |
+
info="Choose from available models in the repository",
|
306 |
+
interactive=True
|
307 |
+
)
|
308 |
|
309 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
310 |
model_status = gr.Textbox(
|
|
|
386 |
# Event handlers
|
387 |
load_btn.click(
|
388 |
load_model_interface,
|
389 |
+
inputs=[context_size, model_dropdown],
|
390 |
outputs=model_status
|
391 |
)
|
392 |
|