prithivMLmods commited on
Commit
4d0dad8
Β·
verified Β·
1 Parent(s): 1b66eea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -14
app.py CHANGED
@@ -90,38 +90,47 @@ def generate(
90
  except Exception as e:
91
  raise ValueError("Unsupported media type. Please upload an image.")
92
 
 
 
 
 
93
  messages = [
94
  {
95
  "role": "user",
96
  "content": [
97
- {
98
- "type": media_type,
99
- media_type: media_path,
100
- },
101
- {"type": "text", "text": message},
102
  ],
103
  }
104
  ]
105
 
106
- text = multimodal_processor.apply_chat_template(
107
- messages, tokenize=False, add_generation_prompt=True
108
- )
109
- image_inputs = multimodal_processor(images=[media_path], return_tensors="pt").to("cuda")
110
  inputs = multimodal_processor(
111
- text=[text],
112
- images=image_inputs,
113
- padding=True,
114
  return_tensors="pt",
 
115
  ).to("cuda")
116
 
 
117
  streamer = TextIteratorStreamer(
118
- multimodal_processor, skip_prompt=True, **{"skip_special_tokens": True}
 
 
 
 
 
 
 
 
 
 
119
  )
120
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
121
 
 
122
  thread = Thread(target=multimodal_model.generate, kwargs=generation_kwargs)
123
  thread.start()
124
 
 
125
  buffer = ""
126
  for new_text in streamer:
127
  buffer += new_text
 
90
  except Exception as e:
91
  raise ValueError("Unsupported media type. Please upload an image.")
92
 
93
+ # Load the image
94
+ image = Image.open(media_path).convert("RGB")
95
+
96
+ # Prepare the input for the multimodal model
97
  messages = [
98
  {
99
  "role": "user",
100
  "content": [
101
+ {"image": media_path}, # Pass the image path
102
+ {"text": message}, # Pass the text prompt
 
 
 
103
  ],
104
  }
105
  ]
106
 
107
+ # Process the input
 
 
 
108
  inputs = multimodal_processor(
109
+ messages,
 
 
110
  return_tensors="pt",
111
+ padding=True,
112
  ).to("cuda")
113
 
114
+ # Stream the output
115
  streamer = TextIteratorStreamer(
116
+ multimodal_processor, skip_prompt=True, skip_special_tokens=True
117
+ )
118
+ generation_kwargs = dict(
119
+ inputs,
120
+ streamer=streamer,
121
+ max_new_tokens=max_new_tokens,
122
+ do_sample=True,
123
+ temperature=temperature,
124
+ top_p=top_p,
125
+ top_k=top_k,
126
+ repetition_penalty=repetition_penalty,
127
  )
 
128
 
129
+ # Start the generation in a separate thread
130
  thread = Thread(target=multimodal_model.generate, kwargs=generation_kwargs)
131
  thread.start()
132
 
133
+ # Stream the output token by token
134
  buffer = ""
135
  for new_text in streamer:
136
  buffer += new_text