File size: 11,090 Bytes
4ef094d e89b401 3e10424 38f364a d1f9e33 e85540a d1f9e33 38f364a 5bf0183 d1f9e33 e85540a 4ef094d e85540a 3e10424 e85540a e89b401 e85540a d1f9e33 38f364a d1f9e33 38f364a e85540a d1f9e33 e85540a d1f9e33 e85540a d1f9e33 e85540a 3e10424 e85540a d1f9e33 e85540a d1f9e33 3e10424 e85540a e89b401 90a2abe e85540a 90a2abe e85540a d1f9e33 3e10424 e85540a d1f9e33 90a2abe e85540a 90a2abe e85540a 90a2abe d1f9e33 90a2abe e85540a 90a2abe e85540a d1f9e33 e85540a 3e10424 d1f9e33 90a2abe e85540a 90a2abe e85540a 90a2abe d1f9e33 90a2abe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
import gradio as gr
import torch
from PIL import Image
import io
import json
import time
import os
import hashlib
import base64
from huggingface_hub import login
import traceback
import sys
import requests
# Print Python and library versions for debugging
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
import transformers
print(f"Transformers version: {transformers.__version__}")
# Print token information (first few characters only for security)
token = os.environ.get("HUGGINGFACE_TOKEN", "")
if token:
print(f"Token found: {token[:5]}...")
else:
print("No token found in environment variables!")
# Explicitly login with your token
try:
login(token=token)
print("Successfully logged in to Hugging Face Hub")
except Exception as e:
print(f"Error logging in: {e}")
# Global variables
model = None
processor = None
# Initialize Llama 4 Scout model
def load_llama4_model():
global model, processor
if model is None or processor is None:
try:
print("Loading Llama 4 Scout model...")
# Import the correct classes for Llama 4
from transformers import AutoProcessor, Llama4ForConditionalGeneration
model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
# Load processor and model
processor = AutoProcessor.from_pretrained(
model_id,
token=token
)
# Use 4-bit quantization to reduce memory usage
from transformers import BitsAndBytesConfig
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4"
)
model = Llama4ForConditionalGeneration.from_pretrained(
model_id,
token=token,
device_map="auto",
torch_dtype=torch.bfloat16,
quantization_config=quantization_config
)
print("Llama 4 Scout model loaded successfully!")
except Exception as e:
print(f"Error loading Llama 4 Scout model: {e}")
print(traceback.format_exc())
# Fall back to LLaVA if Llama 4 fails
try:
print("Falling back to LLaVA...")
from transformers import AutoProcessor, AutoModelForVision2Seq
processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
model = AutoModelForVision2Seq.from_pretrained(
"llava-hf/llava-1.5-7b-hf",
device_map="auto"
)
print("LLaVA model loaded as fallback")
except Exception as fallback_error:
print(f"Even fallback failed: {fallback_error}")
raise
return model, processor
# Function to convert PIL Image to base64
def image_to_base64(img):
buffered = io.BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
return f"data:image/png;base64,{img_str}"
# Simple caching mechanism
cache = {}
def compute_image_hash(image):
"""Compute a hash for an image to use as cache key"""
# Resize to small dimensions to ensure hash is based on content, not size
image = image.resize((100, 100), Image.LANCZOS)
# Convert to bytes
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
# Compute hash
return hashlib.md5(img_byte_arr).hexdigest()
def verify_document(img, doc_type, verification_info):
"""Verify a document using Llama 4 Scout"""
if img is None:
return "Please upload an image"
# Compute image hash for caching
image_hash = compute_image_hash(img)
cache_key = f"verify_{image_hash}_{doc_type}"
# Check cache
if cache_key in cache:
return f"[CACHED] {cache[cache_key]}"
try:
# Load model and processor
model, processor = load_llama4_model()
# Create prompt
prompt = f"""This is a {doc_type} document.
Verify if it's authentic and extract the following information: {verification_info}
Provide your analysis in a structured format."""
# Process with model
start_time = time.time()
print(f"Starting document verification at {start_time}")
# Convert image to base64 URL
img_url = image_to_base64(img)
# Create messages format
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": img_url},
{"type": "text", "text": prompt},
]
},
]
# Process input using the chat template
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
# Generate output
outputs = model.generate(
**inputs,
max_new_tokens=300,
)
# Decode output
result = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
end_time = time.time()
print(f"Completed document verification in {end_time - start_time:.2f} seconds")
# Save to cache
cache[cache_key] = result
return result
except Exception as e:
error_details = traceback.format_exc()
print(f"Error in verify_document: {e}")
print(error_details)
return f"Error processing document: {str(e)}\n\nPlease try again with a different image or try later."
def check_workplace(img, industry):
"""Check workplace compliance using Llama 4 Scout"""
if img is None:
return "Please upload an image"
# Compute image hash for caching
image_hash = compute_image_hash(img)
cache_key = f"workplace_{image_hash}_{industry}"
# Check cache
if cache_key in cache:
return f"[CACHED] {cache[cache_key]}"
try:
# Load model and processor
model, processor = load_llama4_model()
# Create prompt
prompt = f"""This is a workplace in the {industry} industry.
Identify any safety or compliance issues visible in this image.
Focus on:
1. Safety hazards
2. Required signage
3. Proper equipment usage
4. Workspace organization
5. Compliance with regulations
Format your response as a detailed assessment with:
- Issues found (if any)
- Severity level for each issue
- Recommendations for correction"""
# Process with model
start_time = time.time()
print(f"Starting workplace compliance check at {start_time}")
# Convert image to base64 URL
img_url = image_to_base64(img)
# Create messages format
messages = [
{
"role": "user",
"content": [
{"type": "image", "url": img_url},
{"type": "text", "text": prompt},
]
},
]
# Process input using the chat template
inputs = processor.apply_chat_template(
messages,
add_generation_prompt=True,
tokenize=True,
return_dict=True,
return_tensors="pt",
).to(model.device)
# Generate output
outputs = model.generate(
**inputs,
max_new_tokens=300,
)
# Decode output
result = processor.batch_decode(outputs[:, inputs["input_ids"].shape[-1]:])[0]
end_time = time.time()
print(f"Completed workplace compliance check in {end_time - start_time:.2f} seconds")
# Save to cache
cache[cache_key] = result
return result
except Exception as e:
error_details = traceback.format_exc()
print(f"Error in check_workplace: {e}")
print(error_details)
return f"Error processing workplace image: {str(e)}\n\nPlease try again with a different image or try later."
# Create Gradio interface
with gr.Blocks(title="StaffManager AI Assistant") as demo:
gr.Markdown("# StaffManager AI Assistant")
gr.Markdown("This Space provides AI capabilities for StaffManager using Llama 4 Scout.")
with gr.Tab("Document Verification"):
with gr.Row():
with gr.Column():
doc_image = gr.Image(type="pil", label="Upload Document")
doc_type = gr.Dropdown(
["identification", "tax", "employment", "policy"],
label="Document Type",
value="identification"
)
verification_info = gr.Textbox(
label="Verification Data (JSON)",
value='{"name": "John Doe", "id_number": "ABC123456"}'
)
verify_button = gr.Button("Verify Document")
with gr.Column():
doc_result = gr.Textbox(label="Verification Result", lines=10)
verify_button.click(
fn=verify_document,
inputs=[doc_image, doc_type, verification_info],
outputs=[doc_result]
)
with gr.Tab("Workplace Compliance"):
with gr.Row():
with gr.Column():
workplace_image = gr.Image(type="pil", label="Upload Workplace Image")
industry_type = gr.Dropdown(
["retail", "restaurant", "healthcare", "manufacturing"],
label="Industry",
value="retail"
)
check_button = gr.Button("Check Compliance")
with gr.Column():
compliance_result = gr.Textbox(label="Compliance Assessment", lines=10)
check_button.click(
fn=check_workplace,
inputs=[workplace_image, industry_type],
outputs=[compliance_result]
)
with gr.Tab("About"):
gr.Markdown("""
## About StaffManager AI Assistant
This Space uses the Llama 4 Scout model to provide AI capabilities for StaffManager:
- **Document Verification**: Verify and extract information from documents
- **Workplace Compliance**: Identify safety and compliance issues in workplace images
The model is loaded on demand and results are cached for better performance.
### Model Information
- Model: meta-llama/Llama-4-Scout-17B-16E-Instruct
- Type: Multimodal (image + text)
- Size: 17B parameters
""")
# Launch the app
demo.launch() |