File size: 1,502 Bytes
8ce2c13 bb8bac0 892d0c8 ec1a268 892d0c8 723250f 892d0c8 bb8bac0 c3cad0a 892d0c8 c3cad0a 892d0c8 c3cad0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import os
import sys
import subprocess
# Clone the repository if not already present
if not os.path.exists("edge_vlm"):
subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"])
# Change directory to the cloned repository
# Install the required dependencies
subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"])
# subprocess.run(["pip", "install", "transformers"])
# subprocess.run(["pip", "install", "torch"])
# os.chdir("edge_vlm")
sys.path.insert(0,'./edge_vlm')
# Now import the model from the cloned repository
from model import MoondreamModel
import torch
import gradio as gr
# Load the model and tokenizer
model = MoondreamModel.load_model()
tokenizer = MoondreamModel.load_tokenizer()
# Define the default question
default_question = "Describe the image."
# Function to handle image and return generated caption
def generate_caption_with_default(image):
# Preprocess the image
preprocessed_image = MoondreamModel.preprocess_image(image)
# Generate caption
caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer)
return caption
# Create Gradio interface
interface = gr.Interface(
fn=generate_caption_with_default,
inputs=gr.inputs.Image(type="pil", label="Upload an Image"),
outputs="text",
title="Image Caption Generator",
description=f"The default question is: '{default_question}'. Upload an image to generate a description."
)
# Launch the interface
interface.launch()
|