Spaces:

cpg716
/

staffmanager-llama4-scout

Running

File size: 6,881 Bytes

import gradio as gr
import torch
import sys
import traceback
import os
from huggingface_hub import login, list_repo_files

def system_info():
    try:
        import transformers
        
        result = []
        result.append(f"Python version: {sys.version}")
        result.append(f"PyTorch version: {torch.__version__}")
        result.append(f"Transformers version: {transformers.__version__}")
        
        # Check GPU availability
        if torch.cuda.is_available():
            result.append(f"GPU available: {torch.cuda.get_device_name(0)}")
            result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        else:
            result.append("No GPU available")
            
        result.append("System info test successful!")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

def list_gemma3_files():
    try:
        result = []
        result.append("Listing files in Gemma 3 repository...")
        
        # Get token from environment
        token = os.environ.get("HUGGINGFACE_TOKEN", "")
        if token:
            result.append(f"Token found: {token[:5]}...")
        else:
            result.append("No token found in environment variables!")
            return "\n".join(result)
        
        # Login to Hugging Face
        try:
            login(token=token)
            result.append("Successfully logged in to Hugging Face Hub")
        except Exception as e:
            result.append(f"Error logging in: {e}")
            return "\n".join(result)
        
        # List files in the repository
        model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
        result.append(f"Listing files in {model_id}...")
        
        files = list_repo_files(model_id, token=token)
        result.append("Files found:")
        for file in files:
            result.append(f"- {file}")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

def test_gemma3():
    try:
        result = []
        result.append("Testing Gemma 3 model...")
        
        # Get token from environment
        token = os.environ.get("HUGGINGFACE_TOKEN", "")
        if token:
            result.append(f"Token found: {token[:5]}...")
        else:
            result.append("No token found in environment variables!")
            return "\n".join(result)
        
        # Login to Hugging Face
        try:
            login(token=token)
            result.append("Successfully logged in to Hugging Face Hub")
        except Exception as e:
            result.append(f"Error logging in: {e}")
            return "\n".join(result)
        
        # Use Gemma 3 GGUF model
        model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
        
        # First, list the files to find the correct filename
        result.append(f"Listing files in {model_id} to find the model file...")
        from huggingface_hub import list_repo_files
        
        files = list_repo_files(model_id, token=token)
        gguf_files = [f for f in files if f.endswith('.gguf')]
        
        if not gguf_files:
            result.append("No GGUF files found in the repository!")
            return "\n".join(result)
        
        model_filename = gguf_files[0]  # Use the first GGUF file found
        result.append(f"Found model file: {model_filename}")
        
        result.append(f"Downloading {model_id}/{model_filename}...")
        from huggingface_hub import hf_hub_download
        
        model_path = hf_hub_download(
            repo_id=model_id,
            filename=model_filename,
            token=token
        )
        result.append(f"Model downloaded to: {model_path}")
        
        # Load the model
        result.append("Loading model...")
        try:
            import llama_cpp
        except ImportError:
            result.append("llama-cpp-python not installed. Installing now...")
            import subprocess
            subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"])
            import llama_cpp
        
        from llama_cpp import Llama
        
        llm = Llama(
            model_path=model_path,
            n_ctx=2048,  # Context window size
            n_gpu_layers=-1  # Use all available GPU layers
        )
        
        # Generate text
        result.append("Generating text...")
        prompt = "Write a short poem about artificial intelligence."
        
        output = llm(
            prompt,
            max_tokens=100,
            temperature=0.7,
            top_p=0.95,
            echo=False
        )
        
        generated_text = output["choices"][0]["text"]
        result.append(f"Generated text: {generated_text}")
        result.append("Gemma 3 test successful!")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

# Create Gradio interface
with gr.Blocks(title="StaffManager AI Assistant") as demo:
    gr.Markdown("# StaffManager AI Assistant")
    gr.Markdown("Testing Gemma 3 model for StaffManager application.")
    
    with gr.Tab("System Info"):
        with gr.Row():
            with gr.Column():
                info_button = gr.Button("Get System Info")
            with gr.Column():
                info_result = gr.Textbox(label="System Information", lines=10)
        
        info_button.click(
            fn=system_info,
            inputs=[],
            outputs=[info_result]
        )
    
    with gr.Tab("List Gemma 3 Files"):
        with gr.Row():
            with gr.Column():
                list_files_button = gr.Button("List Gemma 3 Files")
            with gr.Column():
                list_files_result = gr.Textbox(label="Files in Repository", lines=20)
        
        list_files_button.click(
            fn=list_gemma3_files,
            inputs=[],
            outputs=[list_files_result]
        )
    
    with gr.Tab("Gemma 3 Test"):
        with gr.Row():
            with gr.Column():
                gemma_button = gr.Button("Test Gemma 3")
            with gr.Column():
                gemma_result = gr.Textbox(label="Test Results", lines=20)
        
        gemma_button.click(
            fn=test_gemma3,
            inputs=[],
            outputs=[gemma_result]
        )
    
    with gr.Tab("About"):
        gr.Markdown("""
        ## About StaffManager AI Assistant
        
        This Space tests the Gemma 3 model for the StaffManager application.
        
        - **Gemma 3**: Google's 27B parameter model in GGUF format for efficient inference
        
        This model requires authentication with a Hugging Face token that has been granted access to the model.
        """)

# Launch the app
demo.launch()