Spaces:

greydenim
/

ui-convert2

Runtime error

App Files Files Community

chriswang09 commited on Mar 16

Commit

3ac452f

1 Parent(s): 84db4ad

first commit

Browse files

Files changed (3) hide show

Dockerfile +30 -0
app.py +68 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+# Use an official Python base image
+FROM python:3.10-slim
+# Set environment variables to avoid prompts during package installation
+ENV DEBIAN_FRONTEND=noninteractive
+# Install system dependencies for OCR (Tesseract) and other libraries
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    libtesseract-dev \
+    libgl1-mesa-glx \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy the requirements file
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the application files
+COPY . .
+# Expose the port Gradio uses (default is 7860)
+EXPOSE 7860
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import gradio as gr
+import torch
+import cv2
+import numpy as np
+import json
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import pytesseract
+# Load Object Detection Pipeline
+obj_detect = pipeline("object-detection", model="facebook/detr-resnet-50", device=-1)
+# Load Qwen for Code Generation
+MODEL_NAME = "Qwen/Qwen2.5-Coder-3B"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME, torch_dtype=dtype, device_map="auto"
+)
+# Define the process_image function (same as your original logic)
+def process_image(img):
+    opencv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+    img_height, img_width, _ = opencv_image.shape
+    # Run Object Detection
+    detections = obj_detect(img)
+    # Run OCR
+    text_data = pytesseract.image_to_string(opencv_image)
+    ui_json = {
+        "id": "generated-ui",
+        "name": "Generated UI",
+        "components": [],
+        "ocr_text": text_data.strip()
+    }
+    for det in detections:
+        ui_json["components"].append({
+            "id": f"{det['label']}-{len(ui_json['components']) + 1}",
+            "name": det["label"].capitalize(),
+            "confidence": round(det["score"], 2),
+        })
+    metadata_str = json.dumps(ui_json, indent=2)
+    # Generate React Code
+    prompt = f"Generate a React component from this metadata:\n{metadata_str}"
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    with torch.no_grad():
+        output = model.generate(**inputs, max_length=1024)
+    code_response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return metadata_str, code_response
+# Gradio Interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs=["text", "text"],
+    title="Screenshot → Metadata & React Code",
+    description="Upload a UI screenshot and get structured metadata + React code.",
+)
+# Run in Docker with 0.0.0.0 to allow external access
+interface.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+torch
+opencv-python-headless
+numpy
+transformers
+pytesseract
+timm
+accelerate>=0.26.0