Spaces:

ovedrive
/

promptgen

Sleeping

App Files Files Community

Abe commited on Apr 29

Commit

f90e7b1

1 Parent(s): f9091c4

publish

Browse files

Files changed (4) hide show

.gitignore +2 -0
README.md +4 -2
app.py +69 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .idea
2	+ .venv

README.md CHANGED Viewed

@@ -7,7 +7,9 @@ sdk: gradio
 sdk_version: 5.27.1
 app_file: app.py
 pinned: false
-short_description: generate video prompts from text or text-image
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk_version: 5.27.1
 app_file: app.py
 pinned: false
+short_description: generate video prompts or captions from text-image
 ---
+A CPU based image labelling with `Salesforce/blip-image-captioning-base` which can be used for training data generation.
+[Justlab.ai](https://justlab.ai)

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import gradio as gr
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from PIL import Image
+import numpy as np
+# Initialize model and processor globally - much smaller model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# Move to GPU if available, otherwise stays on CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def process_input(image, text=""):
+    """Process image and optional text input to generate description"""
+    try:
+        # Convert numpy array to PIL Image
+        if isinstance(image, np.ndarray):
+            pil_image = Image.fromarray(image)
+        else:
+            return "Please provide a valid image"
+        # Set conditional text if provided
+        conditional_text = text if text else "a video of"
+        # Process image
+        inputs = processor(
+            pil_image,
+            text=conditional_text,
+            return_tensors="pt"
+        ).to(device)
+        # Generate with careful parameters
+        output = model.generate(
+            **inputs,
+            max_new_tokens=100,
+            num_beams=5,
+            length_penalty=1.0,
+            repetition_penalty=1.5
+        )
+        # Decode
+        result = processor.decode(output[0], skip_special_tokens=True)
+        return result.strip()
+    except Exception as e:
+        return f"Error processing input: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_input,
+    inputs=[
+        gr.Image(type="numpy", label="Upload Image"),
+        gr.Textbox(
+            label="Prompt (Optional)",
+            placeholder="Guide the description or leave empty for automatic caption",
+            lines=2
+        ),
+    ],
+    outputs=gr.Textbox(label="Generated Description", lines=6),
+    title="Scene Description Generator",
+    description="Upload an image and optionally add a prompt to guide the description. Created by <a href='https://justlab.ai'>Justlab.ai</a>",
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio==5.27.1
+# Model requirements
+transformers>=4.45.0
+Pillow~=11.2.1
+requests
+torch~=2.7.0
+numpy~=2.2.5