File size: 6,881 Bytes
c2cae96
e89b401
59fa1c3
 
 
b949952
d1f9e33
59fa1c3
 
 
ac87ee6
59fa1c3
 
 
 
ac87ee6
59fa1c3
 
 
 
 
 
 
 
 
 
 
 
90a2abe
b949952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726139a
83438fd
 
726139a
243e1dc
726139a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b949952
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726139a
 
 
 
 
 
243e1dc
726139a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83438fd
 
726139a
83438fd
59fa1c3
 
726139a
 
 
 
 
 
59fa1c3
83438fd
726139a
 
 
83438fd
 
 
 
59fa1c3
 
243e1dc
 
726139a
59fa1c3
 
 
 
 
 
 
 
 
 
 
 
 
 
b949952
 
 
 
 
 
 
 
 
 
 
 
 
726139a
59fa1c3
 
726139a
59fa1c3
726139a
59fa1c3
726139a
 
59fa1c3
726139a
59fa1c3
243e1dc
 
 
 
 
726139a
243e1dc
726139a
243e1dc
726139a
243e1dc
59fa1c3
 
90a2abe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import gradio as gr
import torch
import sys
import traceback
import os
from huggingface_hub import login, list_repo_files

def system_info():
    try:
        import transformers
        
        result = []
        result.append(f"Python version: {sys.version}")
        result.append(f"PyTorch version: {torch.__version__}")
        result.append(f"Transformers version: {transformers.__version__}")
        
        # Check GPU availability
        if torch.cuda.is_available():
            result.append(f"GPU available: {torch.cuda.get_device_name(0)}")
            result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        else:
            result.append("No GPU available")
            
        result.append("System info test successful!")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

def list_gemma3_files():
    try:
        result = []
        result.append("Listing files in Gemma 3 repository...")
        
        # Get token from environment
        token = os.environ.get("HUGGINGFACE_TOKEN", "")
        if token:
            result.append(f"Token found: {token[:5]}...")
        else:
            result.append("No token found in environment variables!")
            return "\n".join(result)
        
        # Login to Hugging Face
        try:
            login(token=token)
            result.append("Successfully logged in to Hugging Face Hub")
        except Exception as e:
            result.append(f"Error logging in: {e}")
            return "\n".join(result)
        
        # List files in the repository
        model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
        result.append(f"Listing files in {model_id}...")
        
        files = list_repo_files(model_id, token=token)
        result.append("Files found:")
        for file in files:
            result.append(f"- {file}")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

def test_gemma3():
    try:
        result = []
        result.append("Testing Gemma 3 model...")
        
        # Get token from environment
        token = os.environ.get("HUGGINGFACE_TOKEN", "")
        if token:
            result.append(f"Token found: {token[:5]}...")
        else:
            result.append("No token found in environment variables!")
            return "\n".join(result)
        
        # Login to Hugging Face
        try:
            login(token=token)
            result.append("Successfully logged in to Hugging Face Hub")
        except Exception as e:
            result.append(f"Error logging in: {e}")
            return "\n".join(result)
        
        # Use Gemma 3 GGUF model
        model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
        
        # First, list the files to find the correct filename
        result.append(f"Listing files in {model_id} to find the model file...")
        from huggingface_hub import list_repo_files
        
        files = list_repo_files(model_id, token=token)
        gguf_files = [f for f in files if f.endswith('.gguf')]
        
        if not gguf_files:
            result.append("No GGUF files found in the repository!")
            return "\n".join(result)
        
        model_filename = gguf_files[0]  # Use the first GGUF file found
        result.append(f"Found model file: {model_filename}")
        
        result.append(f"Downloading {model_id}/{model_filename}...")
        from huggingface_hub import hf_hub_download
        
        model_path = hf_hub_download(
            repo_id=model_id,
            filename=model_filename,
            token=token
        )
        result.append(f"Model downloaded to: {model_path}")
        
        # Load the model
        result.append("Loading model...")
        try:
            import llama_cpp
        except ImportError:
            result.append("llama-cpp-python not installed. Installing now...")
            import subprocess
            subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"])
            import llama_cpp
        
        from llama_cpp import Llama
        
        llm = Llama(
            model_path=model_path,
            n_ctx=2048,  # Context window size
            n_gpu_layers=-1  # Use all available GPU layers
        )
        
        # Generate text
        result.append("Generating text...")
        prompt = "Write a short poem about artificial intelligence."
        
        output = llm(
            prompt,
            max_tokens=100,
            temperature=0.7,
            top_p=0.95,
            echo=False
        )
        
        generated_text = output["choices"][0]["text"]
        result.append(f"Generated text: {generated_text}")
        result.append("Gemma 3 test successful!")
        
        return "\n".join(result)
    except Exception as e:
        return f"Error: {str(e)}\n\n{traceback.format_exc()}"

# Create Gradio interface
with gr.Blocks(title="StaffManager AI Assistant") as demo:
    gr.Markdown("# StaffManager AI Assistant")
    gr.Markdown("Testing Gemma 3 model for StaffManager application.")
    
    with gr.Tab("System Info"):
        with gr.Row():
            with gr.Column():
                info_button = gr.Button("Get System Info")
            with gr.Column():
                info_result = gr.Textbox(label="System Information", lines=10)
        
        info_button.click(
            fn=system_info,
            inputs=[],
            outputs=[info_result]
        )
    
    with gr.Tab("List Gemma 3 Files"):
        with gr.Row():
            with gr.Column():
                list_files_button = gr.Button("List Gemma 3 Files")
            with gr.Column():
                list_files_result = gr.Textbox(label="Files in Repository", lines=20)
        
        list_files_button.click(
            fn=list_gemma3_files,
            inputs=[],
            outputs=[list_files_result]
        )
    
    with gr.Tab("Gemma 3 Test"):
        with gr.Row():
            with gr.Column():
                gemma_button = gr.Button("Test Gemma 3")
            with gr.Column():
                gemma_result = gr.Textbox(label="Test Results", lines=20)
        
        gemma_button.click(
            fn=test_gemma3,
            inputs=[],
            outputs=[gemma_result]
        )
    
    with gr.Tab("About"):
        gr.Markdown("""
        ## About StaffManager AI Assistant
        
        This Space tests the Gemma 3 model for the StaffManager application.
        
        - **Gemma 3**: Google's 27B parameter model in GGUF format for efficient inference
        
        This model requires authentication with a Hugging Face token that has been granted access to the model.
        """)

# Launch the app
demo.launch()