ashwml commited on
Commit
83ed194
·
1 Parent(s): af78db7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -13
app.py CHANGED
@@ -61,10 +61,6 @@ tokenizer.pad_token = tokenizer.unk_token
61
 
62
  feature_extractor = ViTImageProcessor.from_pretrained(encoder_name_or_path)
63
 
64
- # cap_model = VisionEncoderDecoderModel.from_pretrained("model")
65
-
66
- # tokenizer = AutoTokenizer.from_pretrained("model")
67
-
68
 
69
 
70
 
@@ -98,16 +94,8 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
98
 
99
  def predict_event(image):
100
 
101
-
102
- # img = Image.open(image).convert("RGB")
103
-
104
  generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to(device))[0])
105
 
106
- # caption_vitgpt = generate_caption(model, image)
107
- #caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer)
108
- # preds = [pred.strip() for pred in generated_caption]
109
- # return preds
110
-
111
  return '\033[96m' +generated_caption+ '\033[0m'
112
 
113
 
@@ -131,9 +119,10 @@ iface = gr.Interface(predict_event,
131
  # gr.Image(type="pil"),
132
  outputs=["text"] )
133
 
134
-
135
 
136
  iface.launch()
 
 
137
  # app = gr.mount_gradio_app(app, iface, path="/")
138
 
139
  # iface.launch(server_name = "0.0.0.0", server_port = 8001,share=True)
 
61
 
62
  feature_extractor = ViTImageProcessor.from_pretrained(encoder_name_or_path)
63
 
 
 
 
 
64
 
65
 
66
 
 
94
 
95
  def predict_event(image):
96
 
 
 
 
97
  generated_caption = tokenizer.decode(model.generate(feature_extractor(image, return_tensors="pt").pixel_values.to(device))[0])
98
 
 
 
 
 
 
99
  return '\033[96m' +generated_caption+ '\033[0m'
100
 
101
 
 
119
  # gr.Image(type="pil"),
120
  outputs=["text"] )
121
 
 
122
 
123
  iface.launch()
124
+
125
+
126
  # app = gr.mount_gradio_app(app, iface, path="/")
127
 
128
  # iface.launch(server_name = "0.0.0.0", server_port = 8001,share=True)