Spaces:

taarhissian
/

projectcostestimator

Sleeping

App Files Files Community

taarhissian commited on Mar 20

Commit

7303878

verified ·

1 Parent(s): eca9640

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -63

app.py CHANGED Viewed

@@ -1,73 +1,41 @@
-import easyocr
-from PIL import Image
-import re
 import gradio as gr
-import numpy as np  # Ensure numpy is imported
-# Initialize the OCR reader
-reader = easyocr.Reader(['en'])
-# Define patterns for different room types and dimensions
-room_patterns = {
-    'bedroom': r'bedroom|bed\s?rm',
-    'bathroom': r'bathroom|bath\s?rm',
-    'kitchen': r'kitchen',
-    'living room': r'living\s?room|sitting\s?room',
-    'dining room': r'dining\s?room',
-    # Add more patterns as needed
-}
-dimension_pattern = r"(\d+'\s?\d+\")|(\d+\.?\d*\s?[x×]\s?\d+\.?\d*)"  # Pattern to match dimensions like 10'6" or 10x12
-# Function to extract room data with counts and measurements
-def extract_room_data(results, room_patterns, dimension_pattern):
-    room_data = {}
-    for result in results:
-        text = result[1].lower()  # Extract the text from the OCR result
-        for room_type, pattern in room_patterns.items():
-            if re.search(pattern, text):
-                # Check if room type is already in the dictionary
-                if room_type not in room_data:
-                    room_data[room_type] = {"count": 0, "measurements": []}
-                room_data[room_type]["count"] += 1
-                # Find dimensions in the text
-                dimensions = re.findall(dimension_pattern, text)
-                if dimensions:
-                    room_data[room_type]["measurements"].extend(dimensions)
-    return room_data
-# Function to process the uploaded image
-def process_image(image):
-    # Convert the Gradio image to PIL Image
-    image = Image.fromarray(image)
-    # Perform OCR
-    results = reader.readtext(np.array(image), detail=0)
-    # Extract room information
-    room_data_with_counts = extract_room_data(results, room_patterns, dimension_pattern)
-    # Format output for display
-    output_text = "Extracted Room Data with Counts and Measurements:\n"
-    for room_type, data in room_data_with_counts.items():
-        output_text += f"- {room_type.capitalize()}:\n"
-        output_text += f"  Count: {data['count']}\n"
-        output_text += f"  Measurements: {', '.join(data['measurements'])}\n"
-    return output_text
-# Create the Gradio interface
 iface = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil"),  # Use type="pil" for PIL Image input
-    outputs="text",
-    title="Floor Plan Room Detection",
-    description="Upload a floor plan image to extract room information."
 )
-# Launch the interface
-iface.launch(share=True)  # Set share=True to get a shareable link

+import torch
+from transformers import AutoProcessor, AutoModelForVision2Seq
 import gradio as gr
+from PIL import Image
+# Load Kosmos-2 Model
+MODEL_NAME = "microsoft/kosmos-2-patch14-224"
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+model = AutoModelForVision2Seq.from_pretrained(MODEL_NAME)
+# Ensure model is on GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def analyze_image(image, prompt):
+    """Process an image with a text prompt using Kosmos-2."""
+    try:
+        image = Image.fromarray(image)  # Convert to PIL Image
+        inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
+        # Generate output
+        output = model.generate(**inputs, max_length=50)
+        result_text = processor.batch_decode(output, skip_special_tokens=True)[0]
+        return result_text
+    except Exception as e:
+        return f"Error: {str(e)}"
+# Gradio Interface
 iface = gr.Interface(
+    fn=analyze_image,
+    inputs=[gr.Image(type="numpy"), gr.Textbox(label="Prompt")],
+    outputs=gr.Textbox(label="Generated Response"),
+    title="Kosmos-2 Image Reasoning",
+    description="Upload an image and provide a text prompt. Kosmos-2 will generate insights based on the image and text input.",
 )
+# Launch the Gradio app
+iface.launch()