cpg716 commited on
Commit
59fa1c3
·
verified ·
1 Parent(s): 83438fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -54
app.py CHANGED
@@ -1,76 +1,120 @@
1
  import gradio as gr
2
  import torch
 
 
 
 
3
 
4
- # Create a simple interface
5
- with gr.Blocks(title="Simple Qwen Test") as demo:
6
- gr.Markdown("# Simple Qwen Test")
7
- gr.Markdown("This is a minimal test to check if the Space is working.")
8
-
9
- with gr.Tab("Basic Test"):
10
- with gr.Row():
11
- with gr.Column():
12
- test_button = gr.Button("Run Basic Test")
13
- with gr.Column():
14
- test_result = gr.Textbox(label="Test Results", lines=10)
15
 
16
- def basic_test():
17
- try:
18
- # Just print system info
19
- import sys
20
- import transformers
21
-
22
- result = []
23
- result.append(f"Python version: {sys.version}")
24
- result.append(f"PyTorch version: {torch.__version__}")
25
- result.append(f"Transformers version: {transformers.__version__}")
26
- result.append("Basic test successful!")
27
-
28
- return "\n".join(result)
29
- except Exception as e:
30
- import traceback
31
- return f"Error: {str(e)}\n\n{traceback.format_exc()}"
32
 
33
- test_button.click(
34
- fn=basic_test,
35
- inputs=[],
36
- outputs=[test_result]
37
- )
 
 
 
 
 
 
 
38
 
39
- # Launch the app
40
- def test_qwen_text():
41
  try:
42
- # Use Qwen model with 4-bit quantization to reduce memory usage
43
- model_id = "Qwen/Qwen2-7B-Instruct"
44
-
45
  result = []
46
- result.append("Loading tokenizer...")
47
- tokenizer = AutoTokenizer.from_pretrained(model_id)
48
 
49
- result.append("Loading model with quantization...")
50
- from transformers import BitsAndBytesConfig
 
 
 
 
 
 
 
 
51
 
52
- quantization_config = BitsAndBytesConfig(
53
- load_in_4bit=True,
54
- bnb_4bit_compute_dtype=torch.float16,
55
- bnb_4bit_quant_type="nf4"
 
 
 
 
 
56
  )
 
 
 
 
 
57
 
58
- model = AutoModelForCausalLM.from_pretrained(
59
- model_id,
60
- quantization_config=quantization_config,
61
- device_map="auto"
62
  )
63
 
 
64
  result.append("Generating text...")
65
- prompt = "Write a short poem about AI."
66
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
67
- outputs = model.generate(**inputs, max_new_tokens=50)
68
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
69
 
 
70
  result.append(f"Generated text: {generated_text}")
71
- result.append("Qwen text model test successful!")
72
 
73
  return "\n".join(result)
74
  except Exception as e:
75
  return f"Error: {str(e)}\n\n{traceback.format_exc()}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ import sys
4
+ import traceback
5
+ import os
6
+ from huggingface_hub import hf_hub_download
7
 
8
+ def system_info():
9
+ try:
10
+ import transformers
 
 
 
 
 
 
 
 
11
 
12
+ result = []
13
+ result.append(f"Python version: {sys.version}")
14
+ result.append(f"PyTorch version: {torch.__version__}")
15
+ result.append(f"Transformers version: {transformers.__version__}")
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Check GPU availability
18
+ if torch.cuda.is_available():
19
+ result.append(f"GPU available: {torch.cuda.get_device_name(0)}")
20
+ result.append(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
21
+ else:
22
+ result.append("No GPU available")
23
+
24
+ result.append("System info test successful!")
25
+
26
+ return "\n".join(result)
27
+ except Exception as e:
28
+ return f"Error: {str(e)}\n\n{traceback.format_exc()}"
29
 
30
+ def test_gemma_gguf():
 
31
  try:
 
 
 
32
  result = []
33
+ result.append("Testing Gemma 3 GGUF model...")
 
34
 
35
+ # First, check if llama-cpp-python is installed
36
+ try:
37
+ import llama_cpp
38
+ result.append(f"llama_cpp version: {llama_cpp.__version__}")
39
+ except ImportError:
40
+ result.append("llama-cpp-python not installed. Installing now...")
41
+ import subprocess
42
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "llama-cpp-python"])
43
+ import llama_cpp
44
+ result.append(f"llama_cpp version: {llama_cpp.__version__}")
45
 
46
+ # Download the model if not already downloaded
47
+ model_id = "google/gemma-3-27b-it-qat-q4_0-gguf"
48
+ model_filename = "gemma-3-27b-it-qat-q4_0.gguf"
49
+
50
+ result.append(f"Downloading {model_id} if not already present...")
51
+ model_path = hf_hub_download(
52
+ repo_id=model_id,
53
+ filename=model_filename,
54
+ resume_download=True
55
  )
56
+ result.append(f"Model downloaded to: {model_path}")
57
+
58
+ # Load the model
59
+ result.append("Loading model...")
60
+ from llama_cpp import Llama
61
 
62
+ llm = Llama(
63
+ model_path=model_path,
64
+ n_ctx=2048, # Context window size
65
+ n_gpu_layers=-1 # Use all available GPU layers
66
  )
67
 
68
+ # Generate text
69
  result.append("Generating text...")
70
+ prompt = "Write a short poem about artificial intelligence."
71
+
72
+ output = llm(
73
+ prompt,
74
+ max_tokens=100,
75
+ temperature=0.7,
76
+ top_p=0.95,
77
+ echo=False
78
+ )
79
 
80
+ generated_text = output["choices"][0]["text"]
81
  result.append(f"Generated text: {generated_text}")
82
+ result.append("Gemma 3 GGUF test successful!")
83
 
84
  return "\n".join(result)
85
  except Exception as e:
86
  return f"Error: {str(e)}\n\n{traceback.format_exc()}"
87
+
88
+ # Create Gradio interface
89
+ with gr.Blocks(title="Gemma 3 GGUF Test") as demo:
90
+ gr.Markdown("# Gemma 3 GGUF Test")
91
+ gr.Markdown("Testing Gemma 3 model in GGUF format using llama-cpp-python.")
92
+
93
+ with gr.Tab("System Info"):
94
+ with gr.Row():
95
+ with gr.Column():
96
+ info_button = gr.Button("Get System Info")
97
+ with gr.Column():
98
+ info_result = gr.Textbox(label="System Information", lines=10)
99
+
100
+ info_button.click(
101
+ fn=system_info,
102
+ inputs=[],
103
+ outputs=[info_result]
104
+ )
105
+
106
+ with gr.Tab("Gemma 3 GGUF Test"):
107
+ with gr.Row():
108
+ with gr.Column():
109
+ gemma_button = gr.Button("Test Gemma 3 GGUF")
110
+ with gr.Column():
111
+ gemma_result = gr.Textbox(label="Test Results", lines=20)
112
+
113
+ gemma_button.click(
114
+ fn=test_gemma_gguf,
115
+ inputs=[],
116
+ outputs=[gemma_result]
117
+ )
118
+
119
+ # Launch the app
120
  demo.launch()