File size: 1,698 Bytes
8ce2c13
241af22
892d0c8
 
 
 
 
 
 
ec1a268
892d0c8
241af22
 
 
723250f
241af22
 
 
 
 
 
 
 
 
bb8bac0
c3cad0a
892d0c8
c3cad0a
892d0c8
c3cad0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import shutil
import subprocess

# Clone the repository if not already present
if not os.path.exists("edge_vlm"):
    subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"])

# Install the required dependencies
subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"])

# Copy all files from edge_vlm to the current directory
source_dir = "edge_vlm"
destination_dir = "."

for filename in os.listdir(source_dir):
    source_file = os.path.join(source_dir, filename)
    destination_file = os.path.join(destination_dir, filename)

    # Copy files, skipping directories like .git
    if os.path.isfile(source_file):
        shutil.copy(source_file, destination_file)

# Now import the model from the copied files
from model import MoondreamModel
import torch
import gradio as gr

# Load the model and tokenizer
model = MoondreamModel.load_model()
tokenizer = MoondreamModel.load_tokenizer()

# Define the default question
default_question = "Describe the image."

# Function to handle image and return generated caption
def generate_caption_with_default(image):
    # Preprocess the image
    preprocessed_image = MoondreamModel.preprocess_image(image)
    
    # Generate caption
    caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer)
    
    return caption

# Create Gradio interface
interface = gr.Interface(
    fn=generate_caption_with_default,
    inputs=gr.inputs.Image(type="pil", label="Upload an Image"),
    outputs="text",
    title="Image Caption Generator",
    description=f"The default question is: '{default_question}'. Upload an image to generate a description."
)

# Launch the interface
interface.launch()