Spaces:

mac9087
/

rightnight

Sleeping

App Files Files Community

mac9087 commited on Apr 23

Commit

74d32e4

verified ·

1 Parent(s): 2057821

Update app.py

Browse files

Files changed (1) hide show

app.py +192 -65

app.py CHANGED Viewed

@@ -11,11 +11,12 @@ import io
 import zipfile
 import uuid
 import traceback
-from diffusers import ShapEImg2ImgPipeline
-from diffusers.utils import export_to_obj
 from huggingface_hub import snapshot_download
 from flask_cors import CORS
 import functools
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
@@ -43,14 +44,14 @@ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max
 processing_jobs = {}
 # Global model variable
-pipe = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
-TIMEOUT_SECONDS = 300  # 5 minutes max for processing
 MAX_DIMENSION = 512    # Max image dimension to process
-MAX_INFERENCE_STEPS = 64  # Maximum allowed inference steps to prevent the index error
 # TimeoutError for handling timeouts
 class TimeoutError(Exception):
@@ -104,65 +105,167 @@ def preprocess_image(image_path):
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
-        # Convert to RGB and return
-        return img
 def load_model():
-    global pipe, model_loaded, model_loading
     if model_loaded:
-        return pipe
     if model_loading:
         # Wait for model to load if it's already in progress
         while model_loading and not model_loaded:
             time.sleep(0.5)
-        return pipe
     try:
         model_loading = True
         print("Starting model loading...")
-        model_name = "openai/shap-e-img2img"
-        # Download model with retry mechanism
-        max_retries = 3
-        retry_delay = 5
-        for attempt in range(max_retries):
-            try:
-                snapshot_download(
-                    repo_id=model_name,
-                    cache_dir=CACHE_DIR,
-                    resume_download=True,
-                )
-                break
-            except Exception as e:
-                if attempt < max_retries - 1:
-                    print(f"Download attempt {attempt+1} failed: {str(e)}. Retrying in {retry_delay} seconds...")
-                    time.sleep(retry_delay)
-                    retry_delay *= 2
-                else:
-                    raise
-        # Initialize pipeline with lower precision to save memory
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        dtype = torch.float16 if device == "cuda" else torch.float32
-        pipe = ShapEImg2ImgPipeline.from_pretrained(
-            model_name,
-            torch_dtype=dtype,
-            cache_dir=CACHE_DIR,
-        )
-        pipe = pipe.to(device)
-        # Optimize for inference
-        if device == "cuda":
-            pipe.enable_model_cpu_offload()
         model_loaded = True
         print(f"Model loaded successfully on {device}")
-        return pipe
     except Exception as e:
         print(f"Error loading model: {str(e)}")
@@ -175,7 +278,7 @@ def load_model():
 def health_check():
     return jsonify({
         "status": "healthy",
-        "model": "Shap-E Image to 3D",
         "device": "cuda" if torch.cuda.is_available() else "cpu"
     }), 200
@@ -218,6 +321,32 @@ def progress(job_id):
     return Response(stream_with_context(generate()), mimetype='text/event-stream')
 @app.route('/convert', methods=['POST'])
 def convert_image_to_3d():
     # Check if image is in the request
@@ -234,7 +363,7 @@ def convert_image_to_3d():
     # Get optional parameters with defaults
     try:
         guidance_scale = float(request.form.get('guidance_scale', 3.0))
-        num_inference_steps = min(int(request.form.get('num_inference_steps', 64)), MAX_INFERENCE_STEPS)
         output_format = request.form.get('output_format', 'obj').lower()
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
@@ -243,7 +372,7 @@ def convert_image_to_3d():
     if guidance_scale < 1.0 or guidance_scale > 5.0:
         return jsonify({"error": "Guidance scale must be between 1.0 and 5.0"}), 400
-    if num_inference_steps < 32 or num_inference_steps > MAX_INFERENCE_STEPS:
         num_inference_steps = min(num_inference_steps, MAX_INFERENCE_STEPS)
     # Validate output format
@@ -279,12 +408,12 @@ def convert_image_to_3d():
         try:
             # Preprocess image (resize if needed)
             processing_jobs[job_id]['progress'] = 5
-            image = preprocess_image(filepath)
             processing_jobs[job_id]['progress'] = 10
             # Load model
             try:
-                pipe = load_model()
                 processing_jobs[job_id]['progress'] = 30
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
@@ -294,12 +423,12 @@ def convert_image_to_3d():
             # Process image with thread-safe timeout
             try:
                 def generate_mesh():
-                    return pipe(
-                        image,
-                        guidance_scale=guidance_scale,
-                        num_inference_steps=num_inference_steps,
-                        output_type="mesh",
-                    ).images
                 images, error = process_with_timeout(generate_mesh, [], TIMEOUT_SECONDS)
@@ -324,13 +453,12 @@ def convert_image_to_3d():
             try:
                 if output_format == 'obj':
                     obj_path = os.path.join(output_dir, "model.obj")
-                    export_to_obj(images[0], obj_path)
                     # Create a zip file with OBJ and MTL
                     zip_path = os.path.join(output_dir, "model.zip")
                     with zipfile.ZipFile(zip_path, 'w') as zipf:
                         zipf.write(obj_path, arcname="model.obj")
-                        mtl_path = os.path.join(output_dir, "model.mtl")
                         if os.path.exists(mtl_path):
                             zipf.write(mtl_path, arcname="model.mtl")
@@ -338,13 +466,12 @@ def convert_image_to_3d():
                     processing_jobs[job_id]['preview_url'] = f"/preview/{job_id}"
                 elif output_format == 'glb':
-                    from trimesh import Trimesh
-                    mesh = images[0]
-                    vertices = mesh.verts
-                    faces = mesh.faces
                     # Create a trimesh object
-                    trimesh_obj = Trimesh(vertices=vertices, faces=faces)
                     # Export as GLB
                     glb_path = os.path.join(output_dir, "model.glb")
@@ -468,7 +595,7 @@ def cleanup_old_jobs():
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
-        "message": "Image to 3D API is running",
         "endpoints": ["/convert", "/progress/<job_id>", "/download/<job_id>", "/preview/<job_id>"]
     }), 200

 import zipfile
 import uuid
 import traceback
 from huggingface_hub import snapshot_download
 from flask_cors import CORS
 import functools
+import numpy as np
+import trimesh
+from scipy.spatial import Delaunay
 app = Flask(__name__)
 CORS(app)  # Enable CORS for all routes
 processing_jobs = {}
 # Global model variable
+neus_model = None
 model_loaded = False
 model_loading = False
 # Configuration for processing
+TIMEOUT_SECONDS = 180  # 3 minutes max for processing
 MAX_DIMENSION = 512    # Max image dimension to process
+MAX_INFERENCE_STEPS = 32  # Maximum allowed inference steps
 # TimeoutError for handling timeouts
 class TimeoutError(Exception):
                 new_width = int(img.width * (MAX_DIMENSION / img.height))
             img = img.resize((new_width, new_height), Image.LANCZOS)
+        # Convert to RGB and convert to tensor
+        img_array = np.array(img) / 255.0  # Normalize to [0, 1]
+        img_tensor = torch.from_numpy(img_array).float().permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
+        return img_tensor
+# Simple NeuS2-inspired implementation for reconstructing 3D surfaces from images
+class NeuS2Model:
+    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
+        self.device = device
+        self.encoder = self._create_encoder().to(device)
+        self.volume_network = self._create_volume_network().to(device)
+    def _create_encoder(self):
+        # Simple convolutional encoder
+        return torch.nn.Sequential(
+            torch.nn.Conv2d(3, 32, 3, stride=2, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(32, 64, 3, stride=2, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.Conv2d(64, 128, 3, stride=2, padding=1),
+            torch.nn.ReLU(),
+            torch.nn.AdaptiveAvgPool2d((8, 8)),
+            torch.nn.Flatten(),
+            torch.nn.Linear(8192, 512)
+        )
+    def _create_volume_network(self):
+        # MLP to predict occupancy and SDF values
+        return torch.nn.Sequential(
+            torch.nn.Linear(515, 256),  # 512 features + 3 coordinates
+            torch.nn.ReLU(),
+            torch.nn.Linear(256, 256),
+            torch.nn.ReLU(),
+            torch.nn.Linear(256, 1)  # SDF value
+        )
+    def extract_features(self, image):
+        with torch.no_grad():
+            return self.encoder(image.to(self.device))
+    def query_points(self, points, features):
+        # points shape: [batch, num_points, 3]
+        # features shape: [batch, 512]
+        batch_size, num_points, _ = points.shape
+        # Expand features to match points
+        features = features.unsqueeze(1).expand(-1, num_points, -1)  # [batch, num_points, 512]
+        # Concatenate points with features
+        points_features = torch.cat([points, features], dim=-1)  # [batch, num_points, 515]
+        points_features = points_features.reshape(-1, 515)  # [batch*num_points, 515]
+        # Query network
+        with torch.no_grad():
+            sdf = self.volume_network(points_features.to(self.device))
+        return sdf.reshape(batch_size, num_points, 1)
+    def generate_mesh(self, image, resolution=64, threshold=0.0, num_steps=16):
+        # Extract image features
+        features = self.extract_features(image)  # [1, 512]
+        # Create grid points
+        x = torch.linspace(-1, 1, resolution)
+        y = torch.linspace(-1, 1, resolution)
+        z = torch.linspace(-1, 1, resolution)
+        grid_x, grid_y, grid_z = torch.meshgrid(x, y, z, indexing='ij')
+        points = torch.stack([grid_x, grid_y, grid_z], dim=-1).reshape(1, -1, 3)  # [1, res^3, 3]
+        # Process in batches to avoid OOM
+        batch_size = 32768  # Adjust based on available memory
+        sdf_values = []
+        for i in range(0, points.shape[1], batch_size):
+            batch_points = points[:, i:i+batch_size]
+            batch_sdf = self.query_points(batch_points, features)
+            sdf_values.append(batch_sdf)
+        sdf_volume = torch.cat(sdf_values, dim=1).reshape(resolution, resolution, resolution).cpu().numpy()
+        # Extract mesh using marching cubes
+        vertices, faces = self._marching_cubes(sdf_volume, threshold)
+        # Create a mesh object with vertices and faces
+        mesh = type('Mesh', (), {'verts': vertices, 'faces': faces})
+        return [mesh]  # Returning in list format to match ShapE's output format
+    def _marching_cubes(self, sdf_volume, threshold=0.0):
+        # Simple implementation using surface points and Delaunay triangulation
+        # For production, you'd want to use proper marching cubes from scikit-image
+        # Find points near the surface
+        x, y, z = np.where(np.abs(sdf_volume) < 0.1)
+        if len(x) < 4:  # Need at least 4 points for Delaunay
+            # Create a simple cube if not enough points
+            vertices = np.array([
+                [-0.5, -0.5, -0.5],
+                [0.5, -0.5, -0.5],
+                [0.5, 0.5, -0.5],
+                [-0.5, 0.5, -0.5],
+                [-0.5, -0.5, 0.5],
+                [0.5, -0.5, 0.5],
+                [0.5, 0.5, 0.5],
+                [-0.5, 0.5, 0.5]
+            ])
+            faces = np.array([
+                [0, 1, 2], [0, 2, 3],  # Bottom face
+                [4, 5, 6], [4, 6, 7],  # Top face
+                [0, 1, 5], [0, 5, 4],  # Front face
+                [2, 3, 7], [2, 7, 6],  # Back face
+                [0, 3, 7], [0, 7, 4],  # Left face
+                [1, 2, 6], [1, 6, 5]   # Right face
+            ])
+            return vertices, faces
+        # Convert indices to 3D coordinates in [-1, 1] range
+        res = sdf_volume.shape[0]
+        points = np.stack([
+            2 * x / res - 1,
+            2 * y / res - 1,
+            2 * z / res - 1
+        ], axis=1)
+        # Limit to a reasonable number of points for Delaunay
+        max_points = 1000
+        if len(points) > max_points:
+            indices = np.random.choice(len(points), max_points, replace=False)
+            points = points[indices]
+        try:
+            # Create triangular mesh using Delaunay
+            tri = Delaunay(points)
+            return points, tri.simplices
+        except Exception:
+            # Fallback to simple shape if Delaunay fails
+            return np.array([[-1, -1, -1], [1, -1, -1], [1, 1, -1], [-1, 1, -1]]), np.array([[0, 1, 2], [0, 2, 3]])
 def load_model():
+    global neus_model, model_loaded, model_loading
     if model_loaded:
+        return neus_model
     if model_loading:
         # Wait for model to load if it's already in progress
         while model_loading and not model_loaded:
             time.sleep(0.5)
+        return neus_model
     try:
         model_loading = True
         print("Starting model loading...")
         device = "cuda" if torch.cuda.is_available() else "cpu"
+        neus_model = NeuS2Model(device=device)
         model_loaded = True
         print(f"Model loaded successfully on {device}")
+        return neus_model
     except Exception as e:
         print(f"Error loading model: {str(e)}")
 def health_check():
     return jsonify({
         "status": "healthy",
+        "model": "NeuS2 Image to 3D",
         "device": "cuda" if torch.cuda.is_available() else "cpu"
     }), 200
     return Response(stream_with_context(generate()), mimetype='text/event-stream')
+def export_to_obj(mesh, obj_path):
+    """Export mesh to OBJ file format"""
+    vertices = mesh.verts
+    faces = mesh.faces
+    with open(obj_path, 'w') as f:
+        # Write vertices
+        for v in vertices:
+            f.write(f"v {v[0]} {v[1]} {v[2]}\n")
+        # Write faces (OBJ uses 1-indexed vertices)
+        for face in faces:
+            f.write(f"f {face[0]+1} {face[1]+1} {face[2]+1}\n")
+    # Create a simple MTL file
+    mtl_path = obj_path.replace('.obj', '.mtl')
+    with open(mtl_path, 'w') as f:
+        f.write("newmtl material0\n")
+        f.write("Ka 1.0 1.0 1.0\n")  # ambient color
+        f.write("Kd 0.8 0.8 0.8\n")  # diffuse color
+        f.write("Ks 0.0 0.0 0.0\n")  # specular color
+        f.write("Ns 0.0\n")          # specular exponent
+        f.write("illum 2\n")         # illumination model
+    return obj_path, mtl_path
 @app.route('/convert', methods=['POST'])
 def convert_image_to_3d():
     # Check if image is in the request
     # Get optional parameters with defaults
     try:
         guidance_scale = float(request.form.get('guidance_scale', 3.0))
+        num_inference_steps = min(int(request.form.get('num_inference_steps', 32)), MAX_INFERENCE_STEPS)
         output_format = request.form.get('output_format', 'obj').lower()
     except ValueError:
         return jsonify({"error": "Invalid parameter values"}), 400
     if guidance_scale < 1.0 or guidance_scale > 5.0:
         return jsonify({"error": "Guidance scale must be between 1.0 and 5.0"}), 400
+    if num_inference_steps < 16 or num_inference_steps > MAX_INFERENCE_STEPS:
         num_inference_steps = min(num_inference_steps, MAX_INFERENCE_STEPS)
     # Validate output format
         try:
             # Preprocess image (resize if needed)
             processing_jobs[job_id]['progress'] = 5
+            image_tensor = preprocess_image(filepath)
             processing_jobs[job_id]['progress'] = 10
             # Load model
             try:
+                model = load_model()
                 processing_jobs[job_id]['progress'] = 30
             except Exception as e:
                 processing_jobs[job_id]['status'] = 'error'
             # Process image with thread-safe timeout
             try:
                 def generate_mesh():
+                    return model.generate_mesh(
+                        image_tensor,
+                        resolution=min(32 + num_inference_steps, 64),  # Adjust resolution based on steps
+                        threshold=0.0,
+                        num_steps=num_inference_steps
+                    )
                 images, error = process_with_timeout(generate_mesh, [], TIMEOUT_SECONDS)
             try:
                 if output_format == 'obj':
                     obj_path = os.path.join(output_dir, "model.obj")
+                    obj_path, mtl_path = export_to_obj(images[0], obj_path)
                     # Create a zip file with OBJ and MTL
                     zip_path = os.path.join(output_dir, "model.zip")
                     with zipfile.ZipFile(zip_path, 'w') as zipf:
                         zipf.write(obj_path, arcname="model.obj")
                         if os.path.exists(mtl_path):
                             zipf.write(mtl_path, arcname="model.mtl")
                     processing_jobs[job_id]['preview_url'] = f"/preview/{job_id}"
                 elif output_format == 'glb':
+                    # Convert to trimesh format
+                    vertices = images[0].verts
+                    faces = images[0].faces
                     # Create a trimesh object
+                    trimesh_obj = trimesh.Trimesh(vertices=vertices, faces=faces)
                     # Export as GLB
                     glb_path = os.path.join(output_dir, "model.glb")
 @app.route('/', methods=['GET'])
 def index():
     return jsonify({
+        "message": "Image to 3D API using NeuS2 is running",
         "endpoints": ["/convert", "/progress/<job_id>", "/download/<job_id>", "/preview/<job_id>"]
     }), 200