cpg716 commited on
Commit
d1f9e33
·
verified ·
1 Parent(s): 3e10424

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -88
app.py CHANGED
@@ -9,6 +9,14 @@ import os
9
  import hashlib
10
  import base64
11
  from huggingface_hub import login
 
 
 
 
 
 
 
 
12
 
13
  # Print token information (first few characters only for security)
14
  token = os.environ.get("HUGGINGFACE_TOKEN", "")
@@ -24,84 +32,103 @@ try:
24
  except Exception as e:
25
  print(f"Error logging in: {e}")
26
 
27
- # Simple test to verify token works
28
- try:
29
- from huggingface_hub import whoami
30
- user_info = whoami()
31
- print(f"Authenticated as: {user_info}")
32
- except Exception as e:
33
- print(f"Error checking authentication: {e}")
34
-
35
- # Global variables for pipelines
36
- text_pipeline = None
37
- image_text_pipeline = None
38
 
39
- # Initialize pipelines
40
- def load_pipelines():
41
- global text_pipeline, image_text_pipeline
42
 
43
- if text_pipeline is None or image_text_pipeline is None:
44
  try:
45
- print("Loading Llama 4 Scout pipelines...")
 
 
 
46
 
47
- # Load text generation pipeline
48
- text_pipeline = pipeline(
49
- "text-generation",
50
- model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
51
- device_map="auto",
52
- torch_dtype=torch.bfloat16,
53
- token=token
54
  )
55
- print("Text pipeline loaded successfully!")
56
 
57
- # Load image-to-text pipeline
58
- image_text_pipeline = pipeline(
 
59
  "image-text-to-text",
60
- model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
61
- device_map="auto",
62
- torch_dtype=torch.bfloat16,
63
- token=token
64
- )
65
- print("Image-text pipeline loaded successfully!")
66
 
67
- except Exception as e:
68
- print(f"Error loading pipelines: {e}")
69
- # Try loading just the text pipeline if image-text fails
70
- try:
71
- if text_pipeline is None:
72
- text_pipeline = pipeline(
73
- "text-generation",
74
  model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
75
  device_map="auto",
76
- torch_dtype=torch.bfloat16,
77
  token=token
78
  )
79
- print("Text pipeline loaded successfully!")
80
- except Exception as text_error:
81
- print(f"Error loading text pipeline: {text_error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- # Fall back to LLaVA for image-text if needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  try:
85
- if image_text_pipeline is None:
86
- print("Falling back to LLaVA for image-text...")
87
- image_text_pipeline = pipeline(
88
- "image-to-text",
89
- model="llava-hf/llava-1.5-13b-hf",
90
- device_map="auto"
91
- )
92
- print("LLaVA image-text pipeline loaded as fallback!")
93
- except Exception as image_error:
94
- print(f"Error loading fallback image pipeline: {image_error}")
95
  raise
96
 
97
- return text_pipeline, image_text_pipeline
98
-
99
- # Function to convert PIL Image to base64
100
- def image_to_base64(img):
101
- buffered = io.BytesIO()
102
- img.save(buffered, format="PNG")
103
- img_str = base64.b64encode(buffered.getvalue()).decode()
104
- return img_str
105
 
106
  # Simple caching mechanism
107
  cache = {}
@@ -133,30 +160,43 @@ def verify_document(img, doc_type, verification_info):
133
  return f"[CACHED] {cache[cache_key]}"
134
 
135
  try:
136
- # Load pipelines
137
- _, image_text_pipeline = load_pipelines()
138
 
139
  # Create prompt
140
  prompt = f"""This is a {doc_type} document.
141
  Verify if it's authentic and extract the following information: {verification_info}
142
  Provide your analysis in a structured format."""
143
 
144
- # Process with pipeline
145
- messages = [
146
- {"role": "user", "content": [
147
- {"type": "text", "text": prompt},
148
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_to_base64(img)}"}}
149
- ]}
150
- ]
 
 
 
 
 
 
 
 
 
151
 
152
- result = image_text_pipeline(messages, max_new_tokens=500)[0]["generated_text"]
 
153
 
154
  # Save to cache
155
- cache[cache_key] = result
156
 
157
- return result
158
  except Exception as e:
159
- return f"Error: {str(e)}"
 
 
 
160
 
161
  def check_workplace(img, industry):
162
  """Check workplace compliance using Llama 4 Scout"""
@@ -172,8 +212,8 @@ def check_workplace(img, industry):
172
  return f"[CACHED] {cache[cache_key]}"
173
 
174
  try:
175
- # Load pipelines
176
- _, image_text_pipeline = load_pipelines()
177
 
178
  # Create prompt
179
  prompt = f"""This is a workplace in the {industry} industry.
@@ -190,22 +230,35 @@ Format your response as a detailed assessment with:
190
  - Severity level for each issue
191
  - Recommendations for correction"""
192
 
193
- # Process with pipeline
194
- messages = [
195
- {"role": "user", "content": [
196
- {"type": "text", "text": prompt},
197
- {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_to_base64(img)}"}}
198
- ]}
199
- ]
 
 
 
 
 
 
 
 
 
200
 
201
- result = image_text_pipeline(messages, max_new_tokens=800)[0]["generated_text"]
 
202
 
203
  # Save to cache
204
- cache[cache_key] = result
205
 
206
- return result
207
  except Exception as e:
208
- return f"Error: {str(e)}"
 
 
 
209
 
210
  # Create Gradio interface
211
  with gr.Blocks(title="StaffManager AI Assistant") as demo:
 
9
  import hashlib
10
  import base64
11
  from huggingface_hub import login
12
+ import traceback
13
+ import sys
14
+
15
+ # Print Python and library versions for debugging
16
+ print(f"Python version: {sys.version}")
17
+ print(f"PyTorch version: {torch.__version__}")
18
+ import transformers
19
+ print(f"Transformers version: {transformers.__version__}")
20
 
21
  # Print token information (first few characters only for security)
22
  token = os.environ.get("HUGGINGFACE_TOKEN", "")
 
32
  except Exception as e:
33
  print(f"Error logging in: {e}")
34
 
35
+ # Global variables
36
+ llama_pipeline = None
 
 
 
 
 
 
 
 
 
37
 
38
+ # Initialize Llama 4 Scout pipeline
39
+ def load_llama_pipeline():
40
+ global llama_pipeline
41
 
42
+ if llama_pipeline is None:
43
  try:
44
+ print("Loading Llama 4 Scout pipeline...")
45
+
46
+ # Use 4-bit quantization to reduce memory usage
47
+ from transformers import BitsAndBytesConfig
48
 
49
+ quantization_config = BitsAndBytesConfig(
50
+ load_in_4bit=True,
51
+ bnb_4bit_compute_dtype=torch.float16,
52
+ bnb_4bit_quant_type="nf4"
 
 
 
53
  )
 
54
 
55
+ # Try different pipeline types for Llama 4 Scout
56
+ pipeline_types = [
57
+ "image-to-text",
58
  "image-text-to-text",
59
+ "visual-question-answering"
60
+ ]
 
 
 
 
61
 
62
+ for pipeline_type in pipeline_types:
63
+ try:
64
+ print(f"Trying pipeline type: {pipeline_type}")
65
+ llama_pipeline = pipeline(
66
+ pipeline_type,
 
 
67
  model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
68
  device_map="auto",
69
+ model_kwargs={"quantization_config": quantization_config},
70
  token=token
71
  )
72
+ print(f"Successfully loaded Llama 4 Scout with pipeline type: {pipeline_type}")
73
+ break
74
+ except Exception as pipeline_error:
75
+ print(f"Failed to load with pipeline type {pipeline_type}: {pipeline_error}")
76
+
77
+ if llama_pipeline is None:
78
+ # If all pipeline types fail, try loading with AutoModel classes
79
+ print("Trying to load with AutoModel classes...")
80
+ from transformers import AutoProcessor, AutoModelForVision2Seq
81
+
82
+ processor = AutoProcessor.from_pretrained(
83
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct",
84
+ token=token
85
+ )
86
+
87
+ model = AutoModelForVision2Seq.from_pretrained(
88
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct",
89
+ token=token,
90
+ quantization_config=quantization_config,
91
+ device_map="auto"
92
+ )
93
+
94
+ # Create a custom pipeline function
95
+ def custom_pipeline(image, prompt, max_new_tokens=300):
96
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to(model.device)
97
+ outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
98
+ return processor.decode(outputs[0], skip_special_tokens=True)
99
 
100
+ llama_pipeline = custom_pipeline
101
+ print("Successfully created custom Llama 4 Scout pipeline")
102
+
103
+ # If still None, fall back to LLaVA
104
+ if llama_pipeline is None:
105
+ print("All Llama 4 Scout loading attempts failed, falling back to LLaVA...")
106
+ llama_pipeline = pipeline(
107
+ "image-to-text",
108
+ model="llava-hf/llava-1.5-7b-hf",
109
+ device_map="auto",
110
+ model_kwargs={"quantization_config": quantization_config}
111
+ )
112
+ print("LLaVA pipeline loaded as fallback")
113
+
114
+ except Exception as e:
115
+ print(f"Error loading pipeline: {e}")
116
+ print(traceback.format_exc())
117
+
118
+ # Final fallback to LLaVA if everything else fails
119
  try:
120
+ print("Falling back to LLaVA after error...")
121
+ llama_pipeline = pipeline(
122
+ "image-to-text",
123
+ model="llava-hf/llava-1.5-7b-hf",
124
+ device_map="auto"
125
+ )
126
+ print("LLaVA pipeline loaded as fallback after error")
127
+ except Exception as fallback_error:
128
+ print(f"Even fallback failed: {fallback_error}")
 
129
  raise
130
 
131
+ return llama_pipeline
 
 
 
 
 
 
 
132
 
133
  # Simple caching mechanism
134
  cache = {}
 
160
  return f"[CACHED] {cache[cache_key]}"
161
 
162
  try:
163
+ # Load pipeline
164
+ pipeline = load_llama_pipeline()
165
 
166
  # Create prompt
167
  prompt = f"""This is a {doc_type} document.
168
  Verify if it's authentic and extract the following information: {verification_info}
169
  Provide your analysis in a structured format."""
170
 
171
+ # Process with pipeline (with timeout)
172
+ start_time = time.time()
173
+ print(f"Starting document verification at {start_time}")
174
+
175
+ # Handle different pipeline types
176
+ if callable(pipeline) and not hasattr(pipeline, 'task'): # Custom pipeline
177
+ result_text = pipeline(image=img, prompt=prompt, max_new_tokens=300)
178
+ elif hasattr(pipeline, 'task') and pipeline.task == "visual-question-answering":
179
+ result = pipeline(image=img, question=prompt, max_new_tokens=300)
180
+ result_text = result[0]["answer"] if isinstance(result, list) else result["answer"]
181
+ else: # Standard pipeline
182
+ result = pipeline(image=img, prompt=prompt, max_new_tokens=300)
183
+ if isinstance(result, list):
184
+ result_text = result[0].get('generated_text', str(result))
185
+ else:
186
+ result_text = str(result)
187
 
188
+ end_time = time.time()
189
+ print(f"Completed document verification in {end_time - start_time:.2f} seconds")
190
 
191
  # Save to cache
192
+ cache[cache_key] = result_text
193
 
194
+ return result_text
195
  except Exception as e:
196
+ error_details = traceback.format_exc()
197
+ print(f"Error in verify_document: {e}")
198
+ print(error_details)
199
+ return f"Error processing document: {str(e)}\n\nPlease try again with a different image or try later."
200
 
201
  def check_workplace(img, industry):
202
  """Check workplace compliance using Llama 4 Scout"""
 
212
  return f"[CACHED] {cache[cache_key]}"
213
 
214
  try:
215
+ # Load pipeline
216
+ pipeline = load_llama_pipeline()
217
 
218
  # Create prompt
219
  prompt = f"""This is a workplace in the {industry} industry.
 
230
  - Severity level for each issue
231
  - Recommendations for correction"""
232
 
233
+ # Process with pipeline (with timeout)
234
+ start_time = time.time()
235
+ print(f"Starting workplace compliance check at {start_time}")
236
+
237
+ # Handle different pipeline types
238
+ if callable(pipeline) and not hasattr(pipeline, 'task'): # Custom pipeline
239
+ result_text = pipeline(image=img, prompt=prompt, max_new_tokens=300)
240
+ elif hasattr(pipeline, 'task') and pipeline.task == "visual-question-answering":
241
+ result = pipeline(image=img, question=prompt, max_new_tokens=300)
242
+ result_text = result[0]["answer"] if isinstance(result, list) else result["answer"]
243
+ else: # Standard pipeline
244
+ result = pipeline(image=img, prompt=prompt, max_new_tokens=300)
245
+ if isinstance(result, list):
246
+ result_text = result[0].get('generated_text', str(result))
247
+ else:
248
+ result_text = str(result)
249
 
250
+ end_time = time.time()
251
+ print(f"Completed workplace compliance check in {end_time - start_time:.2f} seconds")
252
 
253
  # Save to cache
254
+ cache[cache_key] = result_text
255
 
256
+ return result_text
257
  except Exception as e:
258
+ error_details = traceback.format_exc()
259
+ print(f"Error in check_workplace: {e}")
260
+ print(error_details)
261
+ return f"Error processing workplace image: {str(e)}\n\nPlease try again with a different image or try later."
262
 
263
  # Create Gradio interface
264
  with gr.Blocks(title="StaffManager AI Assistant") as demo: