ashwml commited on
Commit
c55b851
·
1 Parent(s): cdafbc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -30
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import gradio as gr
 
 
2
  # import pickle
3
  # import numpy as np
4
  # from fastapi import FastAPI,Response
@@ -38,52 +40,61 @@ import torch
38
 
39
  # f1_metric.set(f1)
40
 
41
- feature_extractor = ViTImageProcessor.from_pretrained("model")
42
 
43
- cap_model = VisionEncoderDecoderModel.from_pretrained("model")
44
 
45
- tokenizer = AutoTokenizer.from_pretrained("model")
46
- print("tokenizer --",tokenizer)
47
 
48
- device = "cuda" if torch.cuda.is_available() else "cpu"
49
 
50
- cap_model.to(device)
51
 
52
- def generate_caption(processor, model, image, tokenizer=None):
53
- # max_length = 16
54
- # num_beams = 4
55
- # gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
56
 
57
- # pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
58
- # pixel_values = pixel_values.to(device)
59
 
60
- # output_ids = model.generate(pixel_values, **gen_kwargs)
61
 
62
- # preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
63
- # preds = [pred.strip() for pred in preds]
64
- # return preds
65
- inputs = processor(images=image, return_tensors="pt").to(device)
66
- print("inputs",inputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- generated_ids = model.generate(pixel_values=inputs.pixel_values)
69
- print("generated_ids",generated_ids)
70
-
71
- if tokenizer is not None:
72
- print("tokenizer not null--",tokenizer)
73
- generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
74
- else:
75
- print("tokenizer null--",tokenizer)
76
- generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
77
 
78
- return generated_caption
79
 
80
  def predict_event(image):
81
 
82
 
 
 
 
83
 
84
- caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer)
 
85
 
86
- return caption_vitgpt
87
 
88
 
89
 
 
1
  import gradio as gr
2
+ from model.config import *
3
+ from PIL import Image
4
  # import pickle
5
  # import numpy as np
6
  # from fastapi import FastAPI,Response
 
40
 
41
  # f1_metric.set(f1)
42
 
 
43
 
 
44
 
 
 
45
 
46
+ model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(encoder._name_or_path, decoder._name_or_path)
47
 
 
48
 
49
+ tokenizer = AutoTokenizer.from_pretrained(decoder._name_or_path)
50
+ tokenizer.pad_token = tokenizer.unk_token
 
 
51
 
 
 
52
 
 
53
 
54
+ # feature_extractor = ViTImageProcessor.from_pretrained("model")
55
+
56
+ # cap_model = VisionEncoderDecoderModel.from_pretrained("model")
57
+
58
+ # tokenizer = AutoTokenizer.from_pretrained("model")
59
+
60
+
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
69
+
70
+ # cap_model.to(device)
71
+
72
+ # def generate_caption(model, image, tokenizer=None):
73
+
74
 
75
+ # generated_ids = model.generate(pixel_values=inputs.pixel_values)
76
+ # print("generated_ids",generated_ids)
77
+
78
+ # if tokenizer is not None:
79
+ # print("tokenizer not null--",tokenizer)
80
+ # generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
81
+ # else:
82
+ # print("tokenizer null--",tokenizer)
83
+ # generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
84
 
85
+ # return generated_caption
86
 
87
  def predict_event(image):
88
 
89
 
90
+ img = Image.open(image).convert("RGB")
91
+
92
+ generated_caption = tokenizer.decode(model.generate(feature_extractor(img, return_tensors="pt").pixel_values.to("cuda"))[0])
93
 
94
+ # caption_vitgpt = generate_caption(model, image)
95
+ #caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer)
96
 
97
+ return '\033[96m' +generated_caption[:85]+ '\033[0m'
98
 
99
 
100