import gradio as gr import torch import sys import traceback import os from huggingface_hub import login, list_repo_files def system_info(): try: import transformers result = [] result.append(f"Python version: {sys.version}") result.append(f"PyTorch version: {torch.__version__}") result.append(f"Transformers version: {transformers.__version__}") # Check GPU availability if torch.cuda.is_available(): result.append(f"GPU available: {torch.cuda.get_device_name(0)}") result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB") else: result.append("No GPU available") result.append("System info test successful!") return "\n".join(result) except Exception as e: return f"Error: {str(e)}\n\n{traceback.format_exc()}" def list_gemma3_files(): try: result = [] result.append("Listing files in Gemma 3 repository...") # Get token from environment token = os.environ.get("HUGGINGFACE_TOKEN", "") if token: result.append(f"Token found: {token[:5]}...") else: result.append("No token found in environment variables!") return "\n".join(result) # Login to Hugging Face try: login(token=token) result.append("Successfully logged in to Hugging Face Hub") except Exception as e: result.append(f"Error logging in: {e}") return "\n".join(result) # List files in the repository model_id = "google/gemma-3-27b-it-qat-q4_0-gguf" result.append(f"Listing files in {model_id}...") files = list_repo_files(model_id, token=token) result.append("Files found:") for file in files: result.append(f"- {file}") return "\n".join(result) except Exception as e: return f"Error: {str(e)}\n\n{traceback.format_exc()}" def test_gemma3(): try: result = [] result.append("Testing Gemma 3 model...") # Get token from environment token = os.environ.get("HUGGINGFACE_TOKEN", "") if token: result.append(f"Token found: {token[:5]}...") else: result.append("No token found in environment variables!") return "\n".join(result) # Login to Hugging Face try: login(token=token) result.append("Successfully logged in to Hugging Face Hub") except Exception as e: result.append(f"Error logging in: {e}") return "\n".join(result) # Use Gemma 3 GGUF model model_id = "google/gemma-3-27b-it-qat-q4_0-gguf" # First, list the files to find the correct filename result.append(f"Listing files in {model_id} to find the model file...") from huggingface_hub import list_repo_files files = list_repo_files(model_id, token=token) gguf_files = [f for f in files if f.endswith('.gguf')] if not gguf_files: result.append("No GGUF files found in the repository!") return "\n".join(result) model_filename = gguf_files[0] # Use the first GGUF file found result.append(f"Found model file: {model_filename}") result.append(f"Downloading {model_id}/{model_filename}...") from huggingface_hub import hf_hub_download model_path = hf_hub_download( repo_id=model_id, filename=model_filename, token=token ) result.append(f"Model downloaded to: {model_path}") # Load the model result.append("Loading model...") try: import llama_cpp except ImportError: result.append("llama-cpp-python not installed. Installing now...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"]) import llama_cpp from llama_cpp import Llama llm = Llama( model_path=model_path, n_ctx=2048, # Context window size n_gpu_layers=-1 # Use all available GPU layers ) # Generate text result.append("Generating text...") prompt = "Write a short poem about artificial intelligence." output = llm( prompt, max_tokens=100, temperature=0.7, top_p=0.95, echo=False ) generated_text = output["choices"][0]["text"] result.append(f"Generated text: {generated_text}") result.append("Gemma 3 test successful!") return "\n".join(result) except Exception as e: return f"Error: {str(e)}\n\n{traceback.format_exc()}" # Create Gradio interface with gr.Blocks(title="StaffManager AI Assistant") as demo: gr.Markdown("# StaffManager AI Assistant") gr.Markdown("Testing Gemma 3 model for StaffManager application.") with gr.Tab("System Info"): with gr.Row(): with gr.Column(): info_button = gr.Button("Get System Info") with gr.Column(): info_result = gr.Textbox(label="System Information", lines=10) info_button.click( fn=system_info, inputs=[], outputs=[info_result] ) with gr.Tab("List Gemma 3 Files"): with gr.Row(): with gr.Column(): list_files_button = gr.Button("List Gemma 3 Files") with gr.Column(): list_files_result = gr.Textbox(label="Files in Repository", lines=20) list_files_button.click( fn=list_gemma3_files, inputs=[], outputs=[list_files_result] ) with gr.Tab("Gemma 3 Test"): with gr.Row(): with gr.Column(): gemma_button = gr.Button("Test Gemma 3") with gr.Column(): gemma_result = gr.Textbox(label="Test Results", lines=20) gemma_button.click( fn=test_gemma3, inputs=[], outputs=[gemma_result] ) with gr.Tab("About"): gr.Markdown(""" ## About StaffManager AI Assistant This Space tests the Gemma 3 model for the StaffManager application. - **Gemma 3**: Google's 27B parameter model in GGUF format for efficient inference This model requires authentication with a Hugging Face token that has been granted access to the model. """) # Launch the app demo.launch()