File size: 6,881 Bytes
c2cae96 e89b401 59fa1c3 b949952 d1f9e33 59fa1c3 ac87ee6 59fa1c3 ac87ee6 59fa1c3 90a2abe b949952 726139a 83438fd 726139a 243e1dc 726139a b949952 726139a 243e1dc 726139a 83438fd 726139a 83438fd 59fa1c3 726139a 59fa1c3 83438fd 726139a 83438fd 59fa1c3 243e1dc 726139a 59fa1c3 b949952 726139a 59fa1c3 726139a 59fa1c3 726139a 59fa1c3 726139a 59fa1c3 726139a 59fa1c3 243e1dc 726139a 243e1dc 726139a 243e1dc 726139a 243e1dc 59fa1c3 90a2abe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import gradio as gr
import torch
import sys
import traceback
import os
from huggingface_hub import login, list_repo_files
def system_info():
try:
import transformers
result = []
result.append(f"Python version: {sys.version}")
result.append(f"PyTorch version: {torch.__version__}")
result.append(f"Transformers version: {transformers.__version__}")
# Check GPU availability
if torch.cuda.is_available():
result.append(f"GPU available: {torch.cuda.get_device_name(0)}")
result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
result.append("No GPU available")
result.append("System info test successful!")
return "\n".join(result)
except Exception as e:
return f"Error: {str(e)}\n\n{traceback.format_exc()}"
def list_gemma3_files():
try:
result = []
result.append("Listing files in Gemma 3 repository...")
# Get token from environment
token = os.environ.get("HUGGINGFACE_TOKEN", "")
if token:
result.append(f"Token found: {token[:5]}...")
else:
result.append("No token found in environment variables!")
return "\n".join(result)
# Login to Hugging Face
try:
login(token=token)
result.append("Successfully logged in to Hugging Face Hub")
except Exception as e:
result.append(f"Error logging in: {e}")
return "\n".join(result)
# List files in the repository
model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
result.append(f"Listing files in {model_id}...")
files = list_repo_files(model_id, token=token)
result.append("Files found:")
for file in files:
result.append(f"- {file}")
return "\n".join(result)
except Exception as e:
return f"Error: {str(e)}\n\n{traceback.format_exc()}"
def test_gemma3():
try:
result = []
result.append("Testing Gemma 3 model...")
# Get token from environment
token = os.environ.get("HUGGINGFACE_TOKEN", "")
if token:
result.append(f"Token found: {token[:5]}...")
else:
result.append("No token found in environment variables!")
return "\n".join(result)
# Login to Hugging Face
try:
login(token=token)
result.append("Successfully logged in to Hugging Face Hub")
except Exception as e:
result.append(f"Error logging in: {e}")
return "\n".join(result)
# Use Gemma 3 GGUF model
model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
# First, list the files to find the correct filename
result.append(f"Listing files in {model_id} to find the model file...")
from huggingface_hub import list_repo_files
files = list_repo_files(model_id, token=token)
gguf_files = [f for f in files if f.endswith('.gguf')]
if not gguf_files:
result.append("No GGUF files found in the repository!")
return "\n".join(result)
model_filename = gguf_files[0] # Use the first GGUF file found
result.append(f"Found model file: {model_filename}")
result.append(f"Downloading {model_id}/{model_filename}...")
from huggingface_hub import hf_hub_download
model_path = hf_hub_download(
repo_id=model_id,
filename=model_filename,
token=token
)
result.append(f"Model downloaded to: {model_path}")
# Load the model
result.append("Loading model...")
try:
import llama_cpp
except ImportError:
result.append("llama-cpp-python not installed. Installing now...")
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"])
import llama_cpp
from llama_cpp import Llama
llm = Llama(
model_path=model_path,
n_ctx=2048, # Context window size
n_gpu_layers=-1 # Use all available GPU layers
)
# Generate text
result.append("Generating text...")
prompt = "Write a short poem about artificial intelligence."
output = llm(
prompt,
max_tokens=100,
temperature=0.7,
top_p=0.95,
echo=False
)
generated_text = output["choices"][0]["text"]
result.append(f"Generated text: {generated_text}")
result.append("Gemma 3 test successful!")
return "\n".join(result)
except Exception as e:
return f"Error: {str(e)}\n\n{traceback.format_exc()}"
# Create Gradio interface
with gr.Blocks(title="StaffManager AI Assistant") as demo:
gr.Markdown("# StaffManager AI Assistant")
gr.Markdown("Testing Gemma 3 model for StaffManager application.")
with gr.Tab("System Info"):
with gr.Row():
with gr.Column():
info_button = gr.Button("Get System Info")
with gr.Column():
info_result = gr.Textbox(label="System Information", lines=10)
info_button.click(
fn=system_info,
inputs=[],
outputs=[info_result]
)
with gr.Tab("List Gemma 3 Files"):
with gr.Row():
with gr.Column():
list_files_button = gr.Button("List Gemma 3 Files")
with gr.Column():
list_files_result = gr.Textbox(label="Files in Repository", lines=20)
list_files_button.click(
fn=list_gemma3_files,
inputs=[],
outputs=[list_files_result]
)
with gr.Tab("Gemma 3 Test"):
with gr.Row():
with gr.Column():
gemma_button = gr.Button("Test Gemma 3")
with gr.Column():
gemma_result = gr.Textbox(label="Test Results", lines=20)
gemma_button.click(
fn=test_gemma3,
inputs=[],
outputs=[gemma_result]
)
with gr.Tab("About"):
gr.Markdown("""
## About StaffManager AI Assistant
This Space tests the Gemma 3 model for the StaffManager application.
- **Gemma 3**: Google's 27B parameter model in GGUF format for efficient inference
This model requires authentication with a Hugging Face token that has been granted access to the model.
""")
# Launch the app
demo.launch() |