File size: 1,630 Bytes
c4e3ea5
 
6b0a154
 
 
 
c4e3ea5
6b0a154
a21c2eb
 
c4e3ea5
 
 
 
6b0a154
 
c4e3ea5
 
 
6b0a154
c4e3ea5
 
6b0a154
 
 
 
 
c4e3ea5
 
6b0a154
 
 
 
 
 
 
 
c4e3ea5
6b0a154
 
 
 
 
 
c4e3ea5
 
6b0a154
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
import torch
import time
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
from utils import create_pdf

# Load model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

def generate_caption(image):
    start_time = time.time()
    
    if image.mode != "RGB":
        image = image.convert("RGB")
    
    inputs = processor(images=image, return_tensors="pt").to(device)
    output = model.generate(**inputs, max_new_tokens=50)
    caption = processor.decode(output[0], skip_special_tokens=True)

    duration = time.time() - start_time
    if duration > 5:
        caption = f"⚠️ Took {round(duration, 2)}s: {caption}"
    
    return caption

def process_images(images):
    results = []
    for i, img in enumerate(images[:10]):  # Limit to 10 images
        caption = generate_caption(img)
        results.append(f"Image {i+1}: {caption}")
    pdf_file = create_pdf(results)
    return "\n\n".join(results), pdf_file

iface = gr.Interface(
    fn=process_images,
    inputs=gr.File(label="Upload up to 10 Site Images", type="file", file_types=[".jpg", ".png"], multiple=True),
    outputs=["text", "file"],
    title="Auto-DPR Generator from Site Images",
    description="Upload construction site images. AI will auto-generate a progress summary and downloadable PDF.",
    allow_flagging="never"
)

if __name__ == "__main__":
    iface.launch()