Spaces:

cpg716
/

staffmanager-llama4-scout

Running

cpg716 commited on Apr 9

Commit

83438fd

verified ·

1 Parent(s): ac87ee6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -37,4 +37,40 @@ with gr.Blocks(title="Simple Qwen Test") as demo:
         )
 # Launch the app
 demo.launch()

         )
 # Launch the app
+def test_qwen_text():
+    try:
+        # Use Qwen model with 4-bit quantization to reduce memory usage
+        model_id = "Qwen/Qwen2-7B-Instruct"
+        result = []
+        result.append("Loading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        result.append("Loading model with quantization...")
+        from transformers import BitsAndBytesConfig
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4"
+        )
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            quantization_config=quantization_config,
+            device_map="auto"
+        )
+        result.append("Generating text...")
+        prompt = "Write a short poem about AI."
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(**inputs, max_new_tokens=50)
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        result.append(f"Generated text: {generated_text}")
+        result.append("Qwen text model test successful!")
+        return "\n".join(result)
+    except Exception as e:
+        return f"Error: {str(e)}\n\n{traceback.format_exc()}"
 demo.launch()