File size: 3,121 Bytes
8749bfc
b7568df
 
 
 
 
 
 
4ee3479
bc82231
8749bfc
 
 
 
bc82231
 
 
 
8749bfc
bc82231
8749bfc
 
bc82231
8749bfc
 
bc82231
 
 
 
8749bfc
bc82231
 
 
 
 
 
 
 
 
8749bfc
000e1d3
5cb1c99
000e1d3
5cb1c99
000e1d3
5cb1c99
8749bfc
bc82231
8749bfc
3e2a45d
bc82231
 
d2864bb
 
 
000e1d3
d2864bb
 
000e1d3
d2864bb
000e1d3
d2864bb
 
 
 
bc82231
c845a40
bc82231
d2864bb
 
 
 
bc82231
d2864bb
bc82231
000e1d3
bc82231
8749bfc
 
000e1d3
8749bfc
bc82231
8749bfc
 
 
 
bc82231
 
 
 
8749bfc
 
 
bc82231
 
8749bfc
bc82231
8749bfc
bc82231
 
 
 
 
8749bfc
 
bc82231
8749bfc
bc82231
8749bfc
bc82231
8749bfc
bc82231
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
# import pickle
# import numpy as np
# from fastapi import FastAPI,Response
# from sklearn.metrics import accuracy_score, f1_score
# import prometheus_client as prom
# import pandas as pd
# import uvicorn
from transformers import VisionEncoderDecoderModel,pipeline, ViTImageProcessor, AutoTokenizer
import torch


#model

# loaded_model = pickle.load(open(save_file_name, 'rb'))

# app=FastAPI()


# test_data=pd.read_csv("test.csv")


# f1_metric = prom.Gauge('death_f1_score', 'F1 score for test samples')

# Function for updating metrics
# def update_metrics():
#     test = test_data.sample(20)
#     X = test.iloc[:, :-1].values
#     y = test['DEATH_EVENT'].values
    
#     # test_text = test['Text'].values
#     test_pred = loaded_model.predict(X)
#     #pred_labels = [int(pred['label'].split("_")[1]) for pred in test_pred]

#     f1 = f1_score( y , test_pred).round(3)

#     #f1 = f1_score(test['labels'], pred_labels).round(3)

#     f1_metric.set(f1)

feature_extractor = ViTImageProcessor.from_pretrained("model")
print("feature_extractor--",feature_extractor)
cap_model = VisionEncoderDecoderModel.from_pretrained("model")
print("cap_model--",cap_model)
tokenizer = AutoTokenizer.from_pretrained("model")
print("tokenizer--",tokenizer)

device = "cuda" if torch.cuda.is_available() else "cpu"

cap_model.to(device)

def generate_caption(processor, model, image, tokenizer=None):
  # max_length = 16
  # num_beams = 4
  # gen_kwargs = {"max_length": max_length, "num_beams": num_beams}

  # pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
  # pixel_values = pixel_values.to(device)

  # output_ids = model.generate(pixel_values, **gen_kwargs)

  # preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
  # preds = [pred.strip() for pred in preds]
  # return preds
    inputs = processor(images=image, return_tensors="pt").to(device)
    
    generated_ids = model.generate(pixel_values=inputs.pixel_values)

    if tokenizer is not None:
        generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    else:
        generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
   
    return generated_caption

def predict_event(image):
    


    caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer)

    return caption_vitgpt




# @app.get("/metrics")
# async def get_metrics():
#     update_metrics()
#     return Response(media_type="text/plain", content= prom.generate_latest())



title = "capstone"
description = "final capstone"

out_response = gr.outputs.Textbox(label="Caption generated by ViT+GPT-2")

iface = gr.Interface(fn=predict_event, 
                         inputs=gr.inputs.Image(type="pil"),
                         outputs=out_response,
                         enable_queue=True)
    


# app = gr.mount_gradio_app(app, iface, path="/")

iface.launch(server_name = "0.0.0.0", server_port = 8001)

# if __name__ == "__main__":
    # Use this for debugging purposes only
 
    # uvicorn.run(app, host="0.0.0.0", port=8001)