Spaces:
Sleeping
Sleeping
File size: 3,210 Bytes
8749bfc c55b851 b7568df 4ee3479 bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc cdafbc0 8749bfc c55b851 8749bfc bc82231 c55b851 000e1d3 c55b851 bc82231 c55b851 bc82231 c55b851 bc82231 000e1d3 bc82231 8749bfc c55b851 8749bfc c55b851 8749bfc c55b851 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 8749bfc bc82231 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
from model.config import *
from PIL import Image
# import pickle
# import numpy as np
# from fastapi import FastAPI,Response
# from sklearn.metrics import accuracy_score, f1_score
# import prometheus_client as prom
# import pandas as pd
# import uvicorn
from transformers import VisionEncoderDecoderModel,pipeline, ViTImageProcessor, AutoTokenizer
import torch
#model
# loaded_model = pickle.load(open(save_file_name, 'rb'))
# app=FastAPI()
# test_data=pd.read_csv("test.csv")
# f1_metric = prom.Gauge('death_f1_score', 'F1 score for test samples')
# Function for updating metrics
# def update_metrics():
# test = test_data.sample(20)
# X = test.iloc[:, :-1].values
# y = test['DEATH_EVENT'].values
# # test_text = test['Text'].values
# test_pred = loaded_model.predict(X)
# #pred_labels = [int(pred['label'].split("_")[1]) for pred in test_pred]
# f1 = f1_score( y , test_pred).round(3)
# #f1 = f1_score(test['labels'], pred_labels).round(3)
# f1_metric.set(f1)
model = VisionEncoderDecoderModel.from_encoder_decoder_pretrained(encoder._name_or_path, decoder._name_or_path)
tokenizer = AutoTokenizer.from_pretrained(decoder._name_or_path)
tokenizer.pad_token = tokenizer.unk_token
# feature_extractor = ViTImageProcessor.from_pretrained("model")
# cap_model = VisionEncoderDecoderModel.from_pretrained("model")
# tokenizer = AutoTokenizer.from_pretrained("model")
# device = "cuda" if torch.cuda.is_available() else "cpu"
# cap_model.to(device)
# def generate_caption(model, image, tokenizer=None):
# generated_ids = model.generate(pixel_values=inputs.pixel_values)
# print("generated_ids",generated_ids)
# if tokenizer is not None:
# print("tokenizer not null--",tokenizer)
# generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# else:
# print("tokenizer null--",tokenizer)
# generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# return generated_caption
def predict_event(image):
img = Image.open(image).convert("RGB")
generated_caption = tokenizer.decode(model.generate(feature_extractor(img, return_tensors="pt").pixel_values.to("cuda"))[0])
# caption_vitgpt = generate_caption(model, image)
#caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer)
return '\033[96m' +generated_caption[:85]+ '\033[0m'
# @app.get("/metrics")
# async def get_metrics():
# update_metrics()
# return Response(media_type="text/plain", content= prom.generate_latest())
title = "capstone"
description = "final capstone"
out_response = gr.outputs.Textbox(label="Caption generated by ViT+GPT-2")
iface = gr.Interface(fn=predict_event,
inputs=gr.inputs.Image(type="pil"),
outputs=out_response,
enable_queue=True)
# app = gr.mount_gradio_app(app, iface, path="/")
iface.launch(server_name = "0.0.0.0", server_port = 8001)
# if __name__ == "__main__":
# Use this for debugging purposes only
# uvicorn.run(app, host="0.0.0.0", port=8001) |