Spaces:
Sleeping
Sleeping
File size: 1,630 Bytes
c4e3ea5 6b0a154 c4e3ea5 6b0a154 a21c2eb c4e3ea5 6b0a154 c4e3ea5 6b0a154 c4e3ea5 6b0a154 c4e3ea5 6b0a154 c4e3ea5 6b0a154 c4e3ea5 6b0a154 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
import torch
import time
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from utils import create_pdf
# Load model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def generate_caption(image):
start_time = time.time()
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(images=image, return_tensors="pt").to(device)
output = model.generate(**inputs, max_new_tokens=50)
caption = processor.decode(output[0], skip_special_tokens=True)
duration = time.time() - start_time
if duration > 5:
caption = f"⚠️ Took {round(duration, 2)}s: {caption}"
return caption
def process_images(images):
results = []
for i, img in enumerate(images[:10]): # Limit to 10 images
caption = generate_caption(img)
results.append(f"Image {i+1}: {caption}")
pdf_file = create_pdf(results)
return "\n\n".join(results), pdf_file
iface = gr.Interface(
fn=process_images,
inputs=gr.File(label="Upload up to 10 Site Images", type="file", file_types=[".jpg", ".png"], multiple=True),
outputs=["text", "file"],
title="Auto-DPR Generator from Site Images",
description="Upload construction site images. AI will auto-generate a progress summary and downloadable PDF.",
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch()
|