import gradio as gr # import pickle # import numpy as np # from fastapi import FastAPI,Response # from sklearn.metrics import accuracy_score, f1_score # import prometheus_client as prom # import pandas as pd # import uvicorn from transformers import VisionEncoderDecoderModel,pipeline, ViTImageProcessor, AutoTokenizer import torch #model # loaded_model = pickle.load(open(save_file_name, 'rb')) # app=FastAPI() # test_data=pd.read_csv("test.csv") # f1_metric = prom.Gauge('death_f1_score', 'F1 score for test samples') # Function for updating metrics # def update_metrics(): # test = test_data.sample(20) # X = test.iloc[:, :-1].values # y = test['DEATH_EVENT'].values # # test_text = test['Text'].values # test_pred = loaded_model.predict(X) # #pred_labels = [int(pred['label'].split("_")[1]) for pred in test_pred] # f1 = f1_score( y , test_pred).round(3) # #f1 = f1_score(test['labels'], pred_labels).round(3) # f1_metric.set(f1) feature_extractor = ViTImageProcessor.from_pretrained("model") cap_model = VisionEncoderDecoderModel.from_pretrained("model") tokenizer = AutoTokenizer.from_pretrained("model") print("tokenizer --",tokenizer) device = "cuda" if torch.cuda.is_available() else "cpu" cap_model.to(device) def generate_caption(processor, model, image, tokenizer=None): # max_length = 16 # num_beams = 4 # gen_kwargs = {"max_length": max_length, "num_beams": num_beams} # pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values # pixel_values = pixel_values.to(device) # output_ids = model.generate(pixel_values, **gen_kwargs) # preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True) # preds = [pred.strip() for pred in preds] # return preds inputs = processor(images=image, return_tensors="pt").to(device) print("inputs",inputs) generated_ids = model.generate(pixel_values=inputs.pixel_values) print("generated_ids",generated_ids) if tokenizer is not None: print("tokenizer not null--",tokenizer) generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] else: print("tokenizer null--",tokenizer) generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return generated_caption def predict_event(image): caption_vitgpt = generate_caption(feature_extractor, cap_model, image, tokenizer) return caption_vitgpt # @app.get("/metrics") # async def get_metrics(): # update_metrics() # return Response(media_type="text/plain", content= prom.generate_latest()) title = "capstone" description = "final capstone" out_response = gr.outputs.Textbox(label="Caption generated by ViT+GPT-2") iface = gr.Interface(fn=predict_event, inputs=gr.inputs.Image(type="pil"), outputs=out_response, enable_queue=True) # app = gr.mount_gradio_app(app, iface, path="/") iface.launch(server_name = "0.0.0.0", server_port = 8001) # if __name__ == "__main__": # Use this for debugging purposes only # uvicorn.run(app, host="0.0.0.0", port=8001)