saakshigupta commited on
Commit
a96c23c
·
verified ·
1 Parent(s): 0b59358

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -46
app.py CHANGED
@@ -3,6 +3,8 @@ import torch
3
  from PIL import Image
4
  import os
5
  import gc
 
 
6
 
7
  # Page config
8
  st.set_page_config(
@@ -23,7 +25,6 @@ def free_memory():
23
 
24
  # Helper function to check CUDA
25
  def init_device():
26
- """Set the appropriate device and return it"""
27
  if torch.cuda.is_available():
28
  st.sidebar.success("✓ GPU available: Using CUDA")
29
  return "cuda"
@@ -36,31 +37,26 @@ device = init_device()
36
 
37
  @st.cache_resource
38
  def load_model():
39
- """Load model using Unsloth, similar to your notebook code"""
40
  try:
41
- # Import libraries here to ensure they're loaded when needed
42
- from peft import PeftModel
43
- from unsloth import FastVisionModel
44
 
45
- st.info("Loading base model and tokenizer using Unsloth...")
 
46
 
47
- # Use the same model ID and loading approach that worked in your notebook
48
- base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
49
- model, tokenizer = FastVisionModel.from_pretrained(
50
  base_model_id,
51
- load_in_4bit=True,
52
- torch_dtype=torch.float16,
53
  )
54
 
55
- # Set to inference mode
56
- FastVisionModel.for_inference(model)
57
-
58
- # Load the fine-tuned adapter
59
- st.info("Loading adapter...")
60
  adapter_id = "saakshigupta/deepfake-explainer-1"
61
  model = PeftModel.from_pretrained(model, adapter_id)
62
 
63
- return model, tokenizer
64
 
65
  except Exception as e:
66
  st.error(f"Error loading model: {str(e)}")
@@ -110,12 +106,12 @@ with st.sidebar:
110
 
111
  # Load model button
112
  if st.button("Load Model"):
113
- with st.spinner("Loading model... this may take a minute."):
114
  try:
115
- model, tokenizer = load_model()
116
- if model is not None and tokenizer is not None:
117
  st.session_state['model'] = model
118
- st.session_state['tokenizer'] = tokenizer
119
  st.success("Model loaded successfully!")
120
  else:
121
  st.error("Failed to load model.")
@@ -143,33 +139,20 @@ if uploaded_file is not None:
143
  try:
144
  # Get components from session state
145
  model = st.session_state['model']
146
- tokenizer = st.session_state['tokenizer']
147
 
148
- # Format the message for Unsloth - same as your notebook
149
- messages = [
150
- {"role": "user", "content": [
151
- {"type": "image"},
152
- {"type": "text", "text": custom_prompt}
153
- ]}
154
- ]
155
 
156
- # Apply chat template
157
- input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
158
-
159
- # Process with image
160
- inputs = tokenizer(
161
- image,
162
- input_text,
163
- add_special_tokens=False,
164
- return_tensors="pt",
165
- ).to(model.device)
166
-
167
- # Apply the cross-attention fix
168
  fixed, inputs = fix_processor_outputs(inputs)
169
  if fixed:
170
  st.info("Fixed cross-attention mask dimensions")
171
 
172
- # Generate analysis
 
 
 
173
  with torch.no_grad():
174
  output_ids = model.generate(
175
  **inputs,
@@ -179,11 +162,11 @@ if uploaded_file is not None:
179
  )
180
 
181
  # Decode the output
182
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
183
 
184
- # Extract the model's response
185
- if "assistant" in response:
186
- result = response.split("assistant")[-1].strip()
187
  else:
188
  result = response
189
 
 
3
  from PIL import Image
4
  import os
5
  import gc
6
+ from transformers import AutoProcessor, AutoModelForCausalLM
7
+ from peft import PeftModel
8
 
9
  # Page config
10
  st.set_page_config(
 
25
 
26
  # Helper function to check CUDA
27
  def init_device():
 
28
  if torch.cuda.is_available():
29
  st.sidebar.success("✓ GPU available: Using CUDA")
30
  return "cuda"
 
37
 
38
  @st.cache_resource
39
  def load_model():
40
+ """Load model without quantization"""
41
  try:
42
+ # Using your original base model
43
+ base_model_id = "unsloth/llama-3.2-11b-vision-instruct-unsloth-bnb-4bit"
 
44
 
45
+ # Load processor
46
+ processor = AutoProcessor.from_pretrained(base_model_id)
47
 
48
+ # Load the model in half precision (float16) without 4-bit quantization
49
+ model = AutoModelForCausalLM.from_pretrained(
 
50
  base_model_id,
51
+ device_map="auto",
52
+ torch_dtype=torch.float16 # Use float16 for memory efficiency
53
  )
54
 
55
+ # Load adapter
 
 
 
 
56
  adapter_id = "saakshigupta/deepfake-explainer-1"
57
  model = PeftModel.from_pretrained(model, adapter_id)
58
 
59
+ return model, processor
60
 
61
  except Exception as e:
62
  st.error(f"Error loading model: {str(e)}")
 
106
 
107
  # Load model button
108
  if st.button("Load Model"):
109
+ with st.spinner("Loading model... this may take several minutes"):
110
  try:
111
+ model, processor = load_model()
112
+ if model is not None and processor is not None:
113
  st.session_state['model'] = model
114
+ st.session_state['processor'] = processor
115
  st.success("Model loaded successfully!")
116
  else:
117
  st.error("Failed to load model.")
 
139
  try:
140
  # Get components from session state
141
  model = st.session_state['model']
142
+ processor = st.session_state['processor']
143
 
144
+ # Process the image using the processor
145
+ inputs = processor(text=custom_prompt, images=image, return_tensors="pt")
 
 
 
 
 
146
 
147
+ # Fix cross-attention mask if needed
 
 
 
 
 
 
 
 
 
 
 
148
  fixed, inputs = fix_processor_outputs(inputs)
149
  if fixed:
150
  st.info("Fixed cross-attention mask dimensions")
151
 
152
+ # Move to device
153
+ inputs = {k: v.to(model.device) for k, v in inputs.items() if isinstance(v, torch.Tensor)}
154
+
155
+ # Generate the analysis
156
  with torch.no_grad():
157
  output_ids = model.generate(
158
  **inputs,
 
162
  )
163
 
164
  # Decode the output
165
+ response = processor.decode(output_ids[0], skip_special_tokens=True)
166
 
167
+ # Extract the actual response (removing the prompt)
168
+ if custom_prompt in response:
169
+ result = response.split(custom_prompt)[-1].strip()
170
  else:
171
  result = response
172