mac9087 commited on
Commit
8dac441
·
verified ·
1 Parent(s): a1d5bed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +378 -616
app.py CHANGED
@@ -11,19 +11,14 @@ import io
11
  import zipfile
12
  import uuid
13
  import traceback
14
- from huggingface_hub import snapshot_download, hf_hub_download, login
15
  from flask_cors import CORS
16
  import numpy as np
17
  import trimesh
18
- from scipy.ndimage import gaussian_filter
 
 
19
  import cv2
20
- import torch.nn.functional as F
21
-
22
- # Try to login with token if available
23
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
24
- if HF_TOKEN:
25
- print("Logging in with Hugging Face token")
26
- login(token=HF_TOKEN)
27
 
28
  app = Flask(__name__)
29
  CORS(app) # Enable CORS for all routes
@@ -43,7 +38,6 @@ os.makedirs(CACHE_DIR, exist_ok=True)
43
  os.environ['HF_HOME'] = CACHE_DIR
44
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
45
  os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
46
- os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' # Limit CUDA memory splits
47
 
48
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
49
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max
@@ -52,16 +46,11 @@ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max
52
  processing_jobs = {}
53
 
54
  # Global model variables
55
- depth_model = None
56
- feature_extractor = None
57
- openlrm_model = None
58
  model_loaded = False
59
  model_loading = False
60
 
61
- # Flag to control whether to use simplified mode (for Hugging Face Spaces)
62
- USE_SIMPLIFIED_MODE = os.environ.get('USE_SIMPLIFIED_MODE', 'false').lower() == 'true'
63
-
64
- # Constants for processing
65
  TIMEOUT_SECONDS = 240 # 4 minutes max for processing
66
  MAX_DIMENSION = 512 # Max image dimension to process
67
 
@@ -99,16 +88,10 @@ def process_with_timeout(function, args, timeout):
99
 
100
  return result[0], None
101
 
102
- def optimize_memory():
103
- """Free up memory to avoid OOM errors"""
104
- gc.collect()
105
- if torch.cuda.is_available():
106
- torch.cuda.empty_cache()
107
-
108
  def allowed_file(filename):
109
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
110
 
111
- # Enhanced image preprocessing
112
  def preprocess_image(image_path):
113
  with Image.open(image_path) as img:
114
  img = img.convert("RGB")
@@ -123,13 +106,14 @@ def preprocess_image(image_path):
123
  new_height = MAX_DIMENSION
124
  new_width = int(img.width * (MAX_DIMENSION / img.height))
125
 
126
- # Use high-quality Lanczos resampling
127
  img = img.resize((new_width, new_height), Image.LANCZOS)
128
 
129
  # Convert to numpy array for additional preprocessing
130
  img_array = np.array(img)
131
 
132
- # Apply adaptive histogram equalization for better contrast
 
133
  if len(img_array.shape) == 3 and img_array.shape[2] == 3:
134
  # Convert to LAB color space
135
  lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
@@ -150,419 +134,73 @@ def preprocess_image(image_path):
150
 
151
  return img
152
 
153
- # Try to remove background - simplified version that won't fail if rembg is not available
154
- def remove_background(image):
155
- """Remove background if rembg is available, otherwise return original image"""
156
- try:
157
- import rembg
158
- return rembg.remove(image)
159
- except ImportError:
160
- print("Rembg not available, skipping background removal")
161
- # Create a copy of the image with RGBA
162
- if isinstance(image, Image.Image):
163
- if image.mode != 'RGBA':
164
- return image.convert('RGBA')
165
- return image
166
-
167
- # Function to select available models - checks which models are accessible
168
- def select_available_model():
169
- """Try to find an available public model for depth estimation"""
170
- public_models = [
171
- "facebook/dpt-hybrid-midas", # Public DPT model
172
- "Intel/dpt-large", # Intel's DPT model
173
- "facebook/dinov2-base", # General vision model
174
- ]
175
-
176
- # Try each model in turn
177
- for model_name in public_models:
178
- try:
179
- print(f"Testing model availability: {model_name}")
180
- # Just try to download the config to check if accessible
181
- from transformers import AutoConfig
182
- AutoConfig.from_pretrained(model_name, force_download=False)
183
- print(f"Model {model_name} is available")
184
- return model_name
185
- except Exception as e:
186
- print(f"Model {model_name} not available: {str(e)}")
187
-
188
- print("No suitable models found. Using manual depth map generation.")
189
- return None
190
-
191
- # Updated OpenLRM loading with fallback to simplified model
192
- def load_openlrm_model():
193
- global openlrm_model, model_loaded, model_loading
194
 
195
- if model_loaded and openlrm_model is not None:
196
- return openlrm_model
197
 
198
  if model_loading:
199
  # Wait for model to load if it's already in progress
200
  while model_loading and not model_loaded:
201
  time.sleep(0.5)
202
- return openlrm_model
203
 
204
  try:
205
  model_loading = True
206
- print("Initializing 3D model generator...")
207
 
208
- # Device selection - prefer CUDA if available
209
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
210
 
211
- # Instead of using OpenLRM which is problematic on Spaces, create a simpler wrapper
212
- # This will generate basic 3D structure without requiring complex models
213
- class Simple3DWrapper:
214
- def __init__(self, device):
215
- self.device = device
216
- print(f"Initialized simple 3D wrapper on {device}")
217
-
218
- def __call__(self, image):
219
- """Create a 3D mesh representation from an image"""
220
- # Generate a depth map without complex models
221
- depth_map = create_simple_depth_map(image)
222
-
223
- # Convert depth map to vertices and faces
224
- h, w = depth_map.shape
225
- vertices = []
226
-
227
- # Create vertices - scale to [-1, 1] range for x and y
228
- scale_factor = 2.0
229
- for i in range(h):
230
- for j in range(w):
231
- x = (j / w - 0.5) * scale_factor
232
- y = (i / h - 0.5) * scale_factor
233
- z = depth_map[i, j] * scale_factor * -1 # Negative to make closer objects "pop out"
234
- vertices.append([x, y, z])
235
-
236
- # Create faces - connect neighboring vertices
237
- faces = []
238
- for i in range(h-1):
239
- for j in range(w-1):
240
- v0 = i * w + j
241
- v1 = i * w + (j + 1)
242
- v2 = (i + 1) * w + j
243
- v3 = (i + 1) * w + (j + 1)
244
-
245
- # Two triangles per grid cell
246
- faces.append([v0, v1, v3])
247
- faces.append([v0, v3, v2])
248
-
249
- return {
250
- "vertices": np.array(vertices),
251
- "faces": np.array(faces)
252
- }
253
-
254
- # Create the 3D model wrapper
255
- openlrm_model = Simple3DWrapper(device)
256
 
257
- model_loaded = True
258
- print(f"Simple 3D model generator initialized on {device}")
259
- return openlrm_model
260
-
261
- except Exception as e:
262
- print(f"Error initializing 3D model: {str(e)}")
263
- print(traceback.format_exc())
264
- return None
265
- finally:
266
- model_loading = False
267
-
268
- # Updated depth model loading with public model support
269
- def load_depth_model():
270
- global depth_model, feature_extractor, model_loaded, model_loading
271
-
272
- if depth_model is not None and feature_extractor is not None:
273
- return depth_model, feature_extractor
274
-
275
- try:
276
- print("Loading depth estimation model...")
277
-
278
- # Select an available public model
279
- model_name = select_available_model()
280
-
281
- if model_name is None:
282
- print("No suitable depth model found. Using manual depth map generation.")
283
- return None, None
284
 
285
- # Device selection
286
  device = "cuda" if torch.cuda.is_available() else "cpu"
287
 
288
- # Import appropriate model class for the selected model
289
- if "dpt" in model_name.lower():
290
- from transformers import DPTForDepthEstimation, DPTFeatureExtractor
291
- print(f"Loading DPT model: {model_name}")
292
- feature_extractor = DPTFeatureExtractor.from_pretrained(model_name, token=HF_TOKEN)
293
- depth_model = DPTForDepthEstimation.from_pretrained(model_name, token=HF_TOKEN)
294
- elif "dinov2" in model_name.lower():
295
- from transformers import AutoFeatureExtractor, AutoModel
296
- print(f"Loading DINOv2 model: {model_name}")
297
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name, token=HF_TOKEN)
298
- depth_model = AutoModel.from_pretrained(model_name, token=HF_TOKEN)
299
- else:
300
- # Generic loading
301
- from transformers import AutoFeatureExtractor, AutoModelForDepthEstimation
302
- print(f"Loading Auto depth model: {model_name}")
303
- feature_extractor = AutoFeatureExtractor.from_pretrained(model_name, token=HF_TOKEN)
304
- depth_model = AutoModelForDepthEstimation.from_pretrained(model_name, token=HF_TOKEN)
305
 
306
- # Move to appropriate device
307
  if device == "cuda":
308
- depth_model = depth_model.to(device)
309
-
310
- print(f"Depth model loaded successfully on {device}")
311
- return depth_model, feature_extractor
312
-
313
- except Exception as e:
314
- print(f"Error loading depth model: {str(e)}")
315
- print(traceback.format_exc())
316
- print("Using manual depth map generation instead.")
317
- return None, None
318
-
319
- # Create a simple depth map without ML models
320
- def create_simple_depth_map(image):
321
- """Create a simple depth map from image without ML models"""
322
- # Convert to numpy array if needed
323
- if isinstance(image, Image.Image):
324
- img_array = np.array(image)
325
- else:
326
- img_array = image
327
-
328
- # Convert to grayscale
329
- if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
330
- gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
331
- else:
332
- gray = img_array.astype(np.uint8)
333
-
334
- # Apply edge detection
335
- edges = cv2.Canny(gray, 100, 200)
336
-
337
- # Create depth map using blur and edges
338
- depth_map = cv2.GaussianBlur(gray, (15, 15), 0)
339
-
340
- # Combine with edges to preserve details
341
- depth_map = depth_map.astype(float) / 255.0
342
- edges = edges.astype(float) / 255.0
343
-
344
- # Edges should be deeper in the depth map
345
- depth_map = depth_map * (1.0 - 0.5 * edges)
346
-
347
- # Center objects usually closer to viewer (create a radial gradient)
348
- h, w = depth_map.shape
349
- center_y, center_x = h // 2, w // 2
350
- y, x = np.ogrid[:h, :w]
351
- dist_from_center = np.sqrt((x - center_x)**2 + (y - center_y)**2)
352
- max_dist = np.sqrt(center_x**2 + center_y**2)
353
- dist_factor = dist_from_center / max_dist
354
-
355
- # Apply center bias - center is closer (lower depth values)
356
- depth_map = depth_map + 0.3 * dist_factor
357
-
358
- # Normalize
359
- depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-10)
360
-
361
- # Smooth the depth map to avoid artifacts
362
- depth_map = gaussian_filter(depth_map, sigma=1.0)
363
-
364
- return depth_map
365
-
366
- # Process image to create 3D model using simplified approach
367
- def process_openlrm(image, job_id, detail_level='medium', output_format='obj'):
368
- try:
369
- # Load OpenLRM model - now returns simplified 3D generator
370
- model = load_openlrm_model()
371
- if model is None:
372
- # Fallback to depth-based approach
373
- return process_depth_based(image, job_id, detail_level, output_format)
374
-
375
- # Preprocess image - remove background for better results
376
- processing_jobs[job_id]['progress'] = 20
377
- image_rgba = remove_background(image)
378
-
379
- # Update progress
380
- processing_jobs[job_id]['progress'] = 40
381
-
382
- # Process with model to get 3D mesh
383
- result = model(image_rgba)
384
-
385
- # Update progress
386
- processing_jobs[job_id]['progress'] = 60
387
-
388
- # Convert model result to trimesh format
389
- mesh = convert_to_trimesh(result, image)
390
-
391
- # Update progress
392
- processing_jobs[job_id]['progress'] = 80
393
-
394
- # Return the created mesh
395
- return mesh
396
-
397
- except Exception as e:
398
- print(f"Error in OpenLRM processing: {str(e)}")
399
- print(traceback.format_exc())
400
- # Fallback to depth-based approach if OpenLRM fails
401
- return process_depth_based(image, job_id, detail_level, output_format)
402
-
403
- # Convert OpenLRM result to trimesh
404
- def convert_to_trimesh(result, image):
405
- # Use the provided vertices and faces from the model result
406
- vertices = np.array(result.get("vertices", []))
407
- faces = np.array(result.get("faces", []))
408
-
409
- # Create a default mesh if needed
410
- if len(vertices) == 0 or len(faces) == 0:
411
- # Generate sample vertices and faces
412
- x = np.linspace(-1, 1, 20)
413
- y = np.linspace(-1, 1, 20)
414
- z = np.linspace(-1, 1, 10)
415
-
416
- # Create grid points
417
- xx, yy = np.meshgrid(x, y)
418
- zz = np.zeros_like(xx)
419
-
420
- # Create a simple height field
421
- vertices = np.vstack([xx.flatten(), yy.flatten(), zz.flatten()]).T
422
-
423
- # Create faces
424
- faces = []
425
- n = 20 # Grid size
426
- for i in range(n-1):
427
- for j in range(n-1):
428
- idx = i*n + j
429
- faces.append([idx, idx+1, idx+n])
430
- faces.append([idx+1, idx+n+1, idx+n])
431
- faces = np.array(faces)
432
-
433
- # Create mesh with provided data
434
- mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
435
-
436
- # Add texture from the original image
437
- if hasattr(image, 'convert'):
438
- try:
439
- img_array = np.array(image.convert("RGBA"))
440
- if img_array.shape[2] == 4: # RGBA
441
- vertex_colors = sample_texture_from_image(img_array, vertices)
442
- mesh.visual.vertex_colors = vertex_colors
443
- except Exception as e:
444
- print(f"Error applying texture: {e}")
445
-
446
- return mesh
447
-
448
- # Sample helper functions for mesh creation
449
- def sample_texture_from_image(image, vertices):
450
- """Sample colors from image based on vertex positions"""
451
- # Sample colors from image based on vertex positions
452
- h, w = image.shape[:2]
453
- colors = np.zeros((len(vertices), 4), dtype=np.uint8)
454
-
455
- # Find the range of vertex positions
456
- min_x, min_y = vertices[:, 0].min(), vertices[:, 1].min()
457
- max_x, max_y = vertices[:, 0].max(), vertices[:, 1].max()
458
-
459
- # Normalize vertex positions to [0,1] for sampling
460
- for i, v in enumerate(vertices):
461
- # Map from vertex coordinates to image coordinates
462
- x_norm = (v[0] - min_x) / (max_x - min_x) if max_x > min_x else 0.5
463
- y_norm = (v[1] - min_y) / (max_y - min_y) if max_y > min_y else 0.5
464
-
465
- # Clamp to valid range
466
- x_norm = max(0, min(1, x_norm))
467
- y_norm = max(0, min(1, y_norm))
468
 
469
- # Convert to image coordinates
470
- x = int(x_norm * (w-1))
471
- y = int(y_norm * (h-1))
472
-
473
- # Sample color
474
- if 0 <= x < w and 0 <= y < h:
475
- colors[i] = image[y, x]
476
- else:
477
- colors[i] = [200, 200, 200, 255] # Default color
478
 
479
- return colors
480
-
481
- # Process using depth-based approach as fallback
482
- def process_depth_based(image, job_id, detail_level='medium', output_format='obj'):
483
- try:
484
- # Load depth model
485
- depth_model_result = load_depth_model()
486
-
487
- # Update progress
488
- processing_jobs[job_id]['progress'] = 30
489
-
490
- # Check if model loading was successful
491
- if depth_model_result[0] is None:
492
- # Use manual depth map generation
493
- print("Using manual depth map generation")
494
- depth_map = create_simple_depth_map(image)
495
- else:
496
- # Extract model and feature extractor
497
- depth_model, feature_extractor = depth_model_result
498
-
499
- # Get depth map from model
500
- with torch.no_grad():
501
- # Prepare image for the model
502
- inputs = feature_extractor(images=image, return_tensors="pt")
503
- if torch.cuda.is_available():
504
- inputs = {k: v.cuda() for k, v in inputs.items()}
505
-
506
- # Forward pass
507
- outputs = depth_model(**inputs)
508
-
509
- # Different models have different output formats
510
- if hasattr(outputs, "predicted_depth"):
511
- predicted_depth = outputs.predicted_depth
512
- elif hasattr(outputs, "logits"): # For some models
513
- predicted_depth = outputs.logits
514
- else:
515
- # Generic handling - take the first output tensor
516
- predicted_depth = list(outputs.values())[0]
517
-
518
- # Resize depth to original image size
519
- depth_map = F.interpolate(
520
- predicted_depth.unsqueeze(1),
521
- size=(image.height, image.width),
522
- mode="bicubic",
523
- align_corners=False,
524
- ).squeeze().cpu().numpy()
525
-
526
- # Update progress
527
- processing_jobs[job_id]['progress'] = 60
528
-
529
- # Normalize depth map if from model
530
- if 'depth_map' not in locals():
531
- depth_min = depth_map.min()
532
- depth_max = depth_map.max()
533
- depth_normalized = (depth_map - depth_min) / (depth_max - depth_min + 1e-10)
534
- else:
535
- depth_normalized = depth_map
536
-
537
- # Create mesh from depth map
538
- mesh = depth_to_mesh(depth_normalized, image,
539
- resolution=100 if detail_level == 'medium' else
540
- 150 if detail_level == 'high' else 80,
541
- detail_level=detail_level)
542
-
543
- # Update progress
544
- processing_jobs[job_id]['progress'] = 80
545
-
546
- # Clean up to free memory
547
- optimize_memory()
548
-
549
- return mesh
550
-
551
  except Exception as e:
552
- print(f"Error in depth-based processing: {str(e)}")
553
  print(traceback.format_exc())
554
-
555
- # Ultimate fallback - create a simple mesh from the image
556
- try:
557
- print("Using emergency fallback mesh generation")
558
- depth_map = create_simple_depth_map(image)
559
- mesh = depth_to_mesh(depth_map, image, resolution=50, detail_level='low')
560
- return mesh
561
- except Exception as fallback_error:
562
- print(f"Fallback mesh generation failed: {fallback_error}")
563
- raise
564
 
565
- # Enhanced depth map processing
566
  def enhance_depth_map(depth_map, detail_level='medium'):
567
  """Apply sophisticated processing to enhance depth map details"""
568
  # Convert to numpy array if needed
@@ -576,7 +214,7 @@ def enhance_depth_map(depth_map, detail_level='medium'):
576
  # Create a copy for processing
577
  enhanced_depth = depth_map.copy().astype(np.float32)
578
 
579
- # Remove outliers using percentile clipping
580
  p_low, p_high = np.percentile(enhanced_depth, [1, 99])
581
  enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
582
 
@@ -585,26 +223,33 @@ def enhance_depth_map(depth_map, detail_level='medium'):
585
 
586
  # Apply different enhancement methods based on detail level
587
  if detail_level == 'high':
588
- # Apply unsharp masking for edge enhancement
 
589
  blurred = gaussian_filter(enhanced_depth, sigma=1.5)
 
590
  mask = enhanced_depth - blurred
 
591
  enhanced_depth = enhanced_depth + 1.5 * mask
592
 
593
- # Apply bilateral filter simulation
 
594
  smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
595
  smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
596
  edge_mask = enhanced_depth - smooth2
597
  enhanced_depth = smooth1 + 1.2 * edge_mask
598
 
599
  elif detail_level == 'medium':
600
- # Less aggressive enhancement
 
601
  blurred = gaussian_filter(enhanced_depth, sigma=1.0)
602
  mask = enhanced_depth - blurred
603
  enhanced_depth = enhanced_depth + 0.8 * mask
 
 
604
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
605
 
606
  else: # low
607
- # Just apply noise reduction
608
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.7)
609
 
610
  # Normalize again after processing
@@ -612,9 +257,9 @@ def enhance_depth_map(depth_map, detail_level='medium'):
612
 
613
  return enhanced_depth
614
 
615
- # Improved depth to mesh conversion with better detail
616
  def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
617
- """Convert depth map to 3D mesh with improved detail preservation"""
618
  # First, enhance the depth map for better details
619
  enhanced_depth = enhance_depth_map(depth_map, detail_level)
620
 
@@ -626,31 +271,50 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
626
  y = np.linspace(0, h-1, resolution)
627
  x_grid, y_grid = np.meshgrid(x, y)
628
 
629
- # Sample depth at grid points
630
- from scipy import interpolate
631
  interp_func = interpolate.RectBivariateSpline(
632
  np.arange(h), np.arange(w), enhanced_depth, kx=3, ky=3
633
  )
 
 
634
  z_values = interp_func(y, x, grid=True)
635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
  # Apply depth scaling appropriate to the detail level
637
  if detail_level == 'high':
638
- z_scaling = 2.5 # More pronounced depth
639
  elif detail_level == 'medium':
640
  z_scaling = 2.0 # Standard depth
641
  else:
642
- z_scaling = 1.5 # Subtle depth
643
 
644
  z_values = z_values * z_scaling
645
 
646
- # Normalize coordinates
647
  x_grid = (x_grid / w - 0.5) * 2.0 # Map to -1 to 1
648
  y_grid = (y_grid / h - 0.5) * 2.0 # Map to -1 to 1
649
 
650
  # Create vertices
651
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
652
 
653
- # Create faces (triangles)
654
  faces = []
655
  for i in range(resolution-1):
656
  for j in range(resolution-1):
@@ -659,167 +323,104 @@ def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
659
  p3 = (i + 1) * resolution + j
660
  p4 = (i + 1) * resolution + (j + 1)
661
 
662
- # Standard triangulation
663
- faces.append([p1, p2, p4])
664
- faces.append([p1, p4, p3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
665
 
666
  faces = np.array(faces)
667
 
668
  # Create mesh
669
  mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
670
 
671
- # Apply texturing if image is provided
672
- if image is not None:
673
  # Convert to numpy array if needed
674
  if isinstance(image, Image.Image):
675
  img_array = np.array(image)
676
  else:
677
  img_array = image
678
 
679
- # Create vertex colors
680
- if len(img_array.shape) >= 2:
681
- # Create vertex colors by sampling the image
682
  vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
683
 
 
684
  for i in range(resolution):
685
  for j in range(resolution):
686
- # Calculate image coordinates
687
- img_x = min(max(0, int(j * (img_array.shape[1] - 1) / (resolution - 1))), img_array.shape[1] - 1)
688
- img_y = min(max(0, int(i * (img_array.shape[0] - 1) / (resolution - 1))), img_array.shape[0] - 1)
 
 
 
 
 
 
 
 
689
 
690
  vertex_idx = i * resolution + j
691
 
692
  if len(img_array.shape) == 3 and img_array.shape[2] == 3: # RGB
693
- r, g, b = img_array[img_y, img_x]
694
- vertex_colors[vertex_idx] = [r, g, b, 255]
 
 
 
 
 
 
 
 
695
  elif len(img_array.shape) == 3 and img_array.shape[2] == 4: # RGBA
696
- vertex_colors[vertex_idx] = img_array[img_y, img_x]
 
 
 
 
697
  else:
698
- # Handle grayscale
699
- gray = img_array[img_y, img_x]
700
- if np.isscalar(gray):
701
- vertex_colors[vertex_idx] = [gray, gray, gray, 255]
702
- else:
703
- # Just in case gray is some kind of array
704
- gray_val = np.mean(gray)
705
- vertex_colors[vertex_idx] = [gray_val, gray_val, gray_val, 255]
706
 
707
  mesh.visual.vertex_colors = vertex_colors
708
 
709
  # Apply smoothing to get rid of staircase artifacts
710
  if detail_level != 'high':
711
- try:
712
- # Use laplacian smoothing if available
713
- mesh = mesh.smoothed(method='laplacian', iterations=1)
714
- except Exception as e:
715
- print(f"Smoothing error (non-critical): {e}")
716
 
717
- # Fix normals for better rendering
718
- try:
719
- mesh.fix_normals()
720
- except Exception as e:
721
- print(f"Normal fixing error (non-critical): {e}")
722
-
723
- # Simulate full 3D by duplicating and flipping the mesh only if detail level is higher
724
- if detail_level == 'high' and not USE_SIMPLIFIED_MODE:
725
- try:
726
- # Create a complete 3D object by duplicating and flipping the mesh
727
- back_mesh = mesh.copy()
728
- # Flip to create the back side
729
- back_mesh.vertices[:, 2] = -back_mesh.vertices[:, 2] - 0.1 # Add small offset to avoid z-fighting
730
- # Fix normals after flipping
731
- back_mesh.fix_normals()
732
-
733
- # Combine front and back meshes
734
- combined_mesh = trimesh.util.concatenate([mesh, back_mesh])
735
-
736
- # Add side panels to create a watertight model
737
- combined_mesh = create_watertight_model(combined_mesh)
738
- return combined_mesh
739
- except Exception as e:
740
- print(f"3D completion error (non-critical): {e}")
741
 
742
  return mesh
743
 
744
- # Create a watertight model by adding side panels
745
- def create_watertight_model(mesh):
746
- try:
747
- # Extract boundary edges - simplified approach to avoid errors
748
- edges = mesh.edges_unique
749
- edge_faces = mesh.edges_face
750
- boundary_edges = []
751
-
752
- # Find edges that are only part of one face (boundaries)
753
- edge_face_counts = np.bincount(edge_faces.flatten(), minlength=len(mesh.faces))
754
- boundary_face_indices = np.where(edge_face_counts == 1)[0]
755
-
756
- # Get boundary edges
757
- for i, edge in enumerate(edges):
758
- faces = edge_faces[i]
759
- if -1 in faces or len(np.unique(faces)) == 1:
760
- boundary_edges.append(edge)
761
-
762
- # If no boundary edges, return the original mesh
763
- if len(boundary_edges) == 0:
764
- return mesh
765
-
766
- # Simplify for Hugging Face Space - just return original mesh
767
- if USE_SIMPLIFIED_MODE:
768
- return mesh
769
-
770
- # Create side panels along boundary edges - simplified version
771
- new_faces = []
772
- new_vertices = mesh.vertices.copy()
773
-
774
- # Just add a base and close the model
775
- min_z = np.min(mesh.vertices[:, 2])
776
- max_z = np.max(mesh.vertices[:, 2])
777
-
778
- # Find vertices near the minimum z height
779
- bottom_vertices = np.where(np.isclose(mesh.vertices[:, 2], min_z, atol=0.1))[0]
780
-
781
- if len(bottom_vertices) > 3:
782
- # Create a simple bottom face - simplified approach
783
- center = np.mean(mesh.vertices[bottom_vertices], axis=0)
784
- center_idx = len(new_vertices)
785
- new_vertices = np.vstack([new_vertices, center])
786
-
787
- # Add triangles connecting the boundary vertices to the center
788
- for i in range(len(bottom_vertices)-1):
789
- new_faces.append([bottom_vertices[i], bottom_vertices[i+1], center_idx])
790
-
791
- # Close the loop
792
- new_faces.append([bottom_vertices[-1], bottom_vertices[0], center_idx])
793
-
794
- # Create new mesh with added faces
795
- if len(new_faces) > 0:
796
- new_faces = np.array(new_faces)
797
- combined_faces = np.vstack([mesh.faces, new_faces])
798
- watertight_mesh = trimesh.Trimesh(vertices=new_vertices, faces=combined_faces)
799
-
800
- # Copy vertex colors if they exist
801
- if hasattr(mesh.visual, 'vertex_colors') and mesh.visual.vertex_colors is not None:
802
- # Extend vertex colors array for new vertices
803
- extended_colors = np.vstack([
804
- mesh.visual.vertex_colors,
805
- np.full((len(new_vertices) - len(mesh.vertices), 4), [200, 200, 200, 255], dtype=np.uint8)
806
- ])
807
- watertight_mesh.visual.vertex_colors = extended_colors
808
-
809
- return watertight_mesh
810
-
811
- return mesh
812
- except Exception as e:
813
- print(f"Watertight model creation failed (non-critical): {e}")
814
- return mesh
815
-
816
  @app.route('/health', methods=['GET'])
817
  def health_check():
818
  return jsonify({
819
  "status": "healthy",
820
- "model": "Enhanced 3D Model Generator",
821
- "device": "cuda" if torch.cuda.is_available() else "cpu",
822
- "simplified_mode": USE_SIMPLIFIED_MODE
823
  }), 200
824
 
825
  @app.route('/progress/<job_id>', methods=['GET'])
@@ -845,14 +446,14 @@ def progress(job_id):
845
  time.sleep(0.5)
846
  check_count += 1
847
 
848
- # Check if job is still running
849
  if check_count > 60: # 30 seconds with no updates
850
  if 'thread_alive' in job and not job['thread_alive']():
851
  job['status'] = 'error'
852
  job['error'] = 'Processing thread died unexpectedly'
853
  break
854
  check_count = 0
855
-
856
  # Send final status
857
  if job['status'] == 'completed':
858
  yield f"data: {json.dumps({'status': 'completed', 'progress': 100, 'result_url': job['result_url'], 'preview_url': job['preview_url']})}\n\n"
@@ -879,13 +480,7 @@ def convert_image_to_3d():
879
  mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 200) # Limit max resolution
880
  output_format = request.form.get('output_format', 'obj').lower()
881
  detail_level = request.form.get('detail_level', 'medium').lower() # Parameter for detail level
882
- model_type = request.form.get('model_type', 'openlrm').lower() # 'openlrm' or 'depth'
883
-
884
- # Adjust parameters for simplified mode
885
- if USE_SIMPLIFIED_MODE:
886
- mesh_resolution = min(mesh_resolution, 100) # Lower resolution for simplified mode
887
- if detail_level == 'high':
888
- detail_level = 'medium' # Downgrade detail level in simplified mode
889
  except ValueError:
890
  return jsonify({"error": "Invalid parameter values"}), 400
891
 
@@ -893,6 +488,12 @@ def convert_image_to_3d():
893
  if output_format not in ['obj', 'glb']:
894
  return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
895
 
 
 
 
 
 
 
896
  # Create a job ID
897
  job_id = str(uuid.uuid4())
898
  output_dir = os.path.join(RESULTS_FOLDER, job_id)
@@ -925,17 +526,58 @@ def convert_image_to_3d():
925
  image = preprocess_image(filepath)
926
  processing_jobs[job_id]['progress'] = 10
927
 
928
- # Process image based on selected model type
929
- if model_type == 'depth' or model_type == 'depth-based':
930
- # Use depth-based approach
931
- mesh = process_depth_based(image, job_id, detail_level, output_format)
932
- else:
933
- # Default to OpenLRM approach
934
- mesh = process_openlrm(image, job_id, detail_level, output_format)
 
935
 
936
- processing_jobs[job_id]['progress'] = 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
 
938
- # Export based on requested format
939
  try:
940
  if output_format == 'obj':
941
  obj_path = os.path.join(output_dir, "model.obj")
@@ -965,7 +607,7 @@ def convert_image_to_3d():
965
  processing_jobs[job_id]['preview_url'] = f"/preview/{job_id}"
966
 
967
  elif output_format == 'glb':
968
- # Export as GLB
969
  glb_path = os.path.join(output_dir, "model.glb")
970
  mesh.export(
971
  glb_path,
@@ -978,7 +620,6 @@ def convert_image_to_3d():
978
  # Update job status
979
  processing_jobs[job_id]['status'] = 'completed'
980
  processing_jobs[job_id]['progress'] = 100
981
- processing_jobs[job_id]['completed_at'] = time.time()
982
  print(f"Job {job_id} completed successfully")
983
  except Exception as e:
984
  error_details = traceback.format_exc()
@@ -992,7 +633,9 @@ def convert_image_to_3d():
992
  os.remove(filepath)
993
 
994
  # Force garbage collection to free memory
995
- optimize_memory()
 
 
996
 
997
  except Exception as e:
998
  # Handle errors
@@ -1086,7 +729,7 @@ def cleanup_old_jobs():
1086
  # Schedule the next cleanup
1087
  threading.Timer(300, cleanup_old_jobs).start() # Run every 5 minutes
1088
 
1089
- # Get detailed information about a model
1090
  @app.route('/model-info/<job_id>', methods=['GET'])
1091
  def model_info(job_id):
1092
  if job_id not in processing_jobs:
@@ -1135,7 +778,7 @@ def model_info(job_id):
1135
  @app.route('/', methods=['GET'])
1136
  def index():
1137
  return jsonify({
1138
- "message": "Enhanced 3D Model Generator",
1139
  "endpoints": [
1140
  "/convert",
1141
  "/progress/<job_id>",
@@ -1147,54 +790,173 @@ def index():
1147
  "mesh_resolution": "Integer (50-200), controls mesh density",
1148
  "output_format": "obj or glb",
1149
  "detail_level": "low, medium, or high - controls the level of detail in the final model",
1150
- "model_type": "openlrm (default, full 3D) or depth (faster but simpler)"
1151
  },
1152
- "description": "This API creates high-quality 3D models from 2D images with full 3D structure and texturing",
1153
- "simplified_mode": USE_SIMPLIFIED_MODE
1154
  }), 200
1155
 
1156
- # System compatibility check function
1157
- def check_system_compatibility():
1158
- """Check if the system can run the full model or needs simplified mode"""
1159
- print("Checking system compatibility...")
 
 
1160
 
1161
- # Check available memory
1162
- try:
1163
- import psutil
1164
- mem = psutil.virtual_memory()
1165
- free_mem_gb = mem.available / (1024 ** 3)
1166
- print(f"Available memory: {free_mem_gb:.2f} GB")
1167
- except ImportError:
1168
- print("psutil not available, cannot check memory")
1169
- free_mem_gb = 1.0 # Assume low memory
1170
-
1171
- # Check GPU
1172
- gpu_available = torch.cuda.is_available()
1173
- gpu_mem_gb = 0
1174
- if gpu_available:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1175
  try:
1176
- gpu_mem_gb = torch.cuda.get_device_properties(0).total_memory / (1024 ** 3)
1177
- print(f"GPU available: {gpu_available}, Memory: {gpu_mem_gb:.2f} GB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1178
  except Exception as e:
1179
- print(f"Error checking GPU memory: {e}")
1180
- else:
1181
- print("No GPU available")
 
 
 
 
1182
 
1183
- # Set simplified mode if limited resources
1184
- global USE_SIMPLIFIED_MODE
1185
- if free_mem_gb < 4.0 or (gpu_available and gpu_mem_gb < 2.0):
1186
- print("Limited resources detected, using simplified mode")
1187
- USE_SIMPLIFIED_MODE = True
1188
- else:
1189
- print("Sufficient resources detected")
1190
 
1191
- if __name__ == '__main__':
1192
- # Check system compatibility
1193
- check_system_compatibility()
 
 
 
 
 
 
 
1194
 
 
 
 
 
 
 
 
 
 
 
1195
  # Start the cleanup thread
1196
  cleanup_old_jobs()
1197
 
1198
  # Use port 7860 which is standard for Hugging Face Spaces
1199
  port = int(os.environ.get('PORT', 7860))
1200
- app.run(host='0.0.0.0', port=port)
 
11
  import zipfile
12
  import uuid
13
  import traceback
14
+ from huggingface_hub import snapshot_download
15
  from flask_cors import CORS
16
  import numpy as np
17
  import trimesh
18
+ from transformers import pipeline
19
+ from scipy.ndimage import gaussian_filter, uniform_filter, median_filter
20
+ from scipy import interpolate
21
  import cv2
 
 
 
 
 
 
 
22
 
23
  app = Flask(__name__)
24
  CORS(app) # Enable CORS for all routes
 
38
  os.environ['HF_HOME'] = CACHE_DIR
39
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(CACHE_DIR, 'transformers')
40
  os.environ['HF_DATASETS_CACHE'] = os.path.join(CACHE_DIR, 'datasets')
 
41
 
42
  app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
43
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max
 
46
  processing_jobs = {}
47
 
48
  # Global model variables
49
+ depth_estimator = None
 
 
50
  model_loaded = False
51
  model_loading = False
52
 
53
+ # Configuration for processing
 
 
 
54
  TIMEOUT_SECONDS = 240 # 4 minutes max for processing
55
  MAX_DIMENSION = 512 # Max image dimension to process
56
 
 
88
 
89
  return result[0], None
90
 
 
 
 
 
 
 
91
  def allowed_file(filename):
92
  return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
93
 
94
+ # Enhanced image preprocessing with better detail preservation
95
  def preprocess_image(image_path):
96
  with Image.open(image_path) as img:
97
  img = img.convert("RGB")
 
106
  new_height = MAX_DIMENSION
107
  new_width = int(img.width * (MAX_DIMENSION / img.height))
108
 
109
+ # Use high-quality Lanczos resampling for better detail preservation
110
  img = img.resize((new_width, new_height), Image.LANCZOS)
111
 
112
  # Convert to numpy array for additional preprocessing
113
  img_array = np.array(img)
114
 
115
+ # Optional: Apply adaptive histogram equalization for better contrast
116
+ # This helps the depth model detect more details
117
  if len(img_array.shape) == 3 and img_array.shape[2] == 3:
118
  # Convert to LAB color space
119
  lab = cv2.cvtColor(img_array, cv2.COLOR_RGB2LAB)
 
134
 
135
  return img
136
 
137
+ def load_model():
138
+ global depth_estimator, model_loaded, model_loading
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ if model_loaded:
141
+ return depth_estimator
142
 
143
  if model_loading:
144
  # Wait for model to load if it's already in progress
145
  while model_loading and not model_loaded:
146
  time.sleep(0.5)
147
+ return depth_estimator
148
 
149
  try:
150
  model_loading = True
151
+ print("Starting model loading...")
152
 
153
+ # Using DPT-Large which provides better detail than DPT-Hybrid
154
+ # Alternatively, consider "vinvino02/glpn-nyu" for different detail characteristics
155
+ model_name = "Intel/dpt-large"
156
 
157
+ # Download model with retry mechanism
158
+ max_retries = 3
159
+ retry_delay = 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ for attempt in range(max_retries):
162
+ try:
163
+ snapshot_download(
164
+ repo_id=model_name,
165
+ cache_dir=CACHE_DIR,
166
+ resume_download=True,
167
+ )
168
+ break
169
+ except Exception as e:
170
+ if attempt < max_retries - 1:
171
+ print(f"Download attempt {attempt+1} failed: {str(e)}. Retrying in {retry_delay} seconds...")
172
+ time.sleep(retry_delay)
173
+ retry_delay *= 2
174
+ else:
175
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ # Initialize model with appropriate precision
178
  device = "cuda" if torch.cuda.is_available() else "cpu"
179
 
180
+ # Load depth estimator pipeline
181
+ depth_estimator = pipeline(
182
+ "depth-estimation",
183
+ model=model_name,
184
+ device=device if device == "cuda" else -1,
185
+ cache_dir=CACHE_DIR
186
+ )
 
 
 
 
 
 
 
 
 
 
187
 
188
+ # Optimize memory usage
189
  if device == "cuda":
190
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ model_loaded = True
193
+ print(f"Model loaded successfully on {device}")
194
+ return depth_estimator
 
 
 
 
 
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  except Exception as e:
197
+ print(f"Error loading model: {str(e)}")
198
  print(traceback.format_exc())
199
+ raise
200
+ finally:
201
+ model_loading = False
 
 
 
 
 
 
 
202
 
203
+ # Enhanced depth processing function to improve detail quality
204
  def enhance_depth_map(depth_map, detail_level='medium'):
205
  """Apply sophisticated processing to enhance depth map details"""
206
  # Convert to numpy array if needed
 
214
  # Create a copy for processing
215
  enhanced_depth = depth_map.copy().astype(np.float32)
216
 
217
+ # Remove outliers using percentile clipping (more stable than min/max)
218
  p_low, p_high = np.percentile(enhanced_depth, [1, 99])
219
  enhanced_depth = np.clip(enhanced_depth, p_low, p_high)
220
 
 
223
 
224
  # Apply different enhancement methods based on detail level
225
  if detail_level == 'high':
226
+ # Apply unsharp masking for edge enhancement - simulating Hunyuan's detail technique
227
+ # First apply gaussian blur
228
  blurred = gaussian_filter(enhanced_depth, sigma=1.5)
229
+ # Create the unsharp mask
230
  mask = enhanced_depth - blurred
231
+ # Apply the mask with strength factor
232
  enhanced_depth = enhanced_depth + 1.5 * mask
233
 
234
+ # Apply bilateral filter to preserve edges while smoothing noise
235
+ # Simulate using gaussian combinations
236
  smooth1 = gaussian_filter(enhanced_depth, sigma=0.5)
237
  smooth2 = gaussian_filter(enhanced_depth, sigma=2.0)
238
  edge_mask = enhanced_depth - smooth2
239
  enhanced_depth = smooth1 + 1.2 * edge_mask
240
 
241
  elif detail_level == 'medium':
242
+ # Less aggressive but still effective enhancement
243
+ # Apply mild unsharp masking
244
  blurred = gaussian_filter(enhanced_depth, sigma=1.0)
245
  mask = enhanced_depth - blurred
246
  enhanced_depth = enhanced_depth + 0.8 * mask
247
+
248
+ # Apply mild smoothing to reduce noise but preserve edges
249
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.5)
250
 
251
  else: # low
252
+ # Just apply noise reduction without too much detail enhancement
253
  enhanced_depth = gaussian_filter(enhanced_depth, sigma=0.7)
254
 
255
  # Normalize again after processing
 
257
 
258
  return enhanced_depth
259
 
260
+ # Convert depth map to 3D mesh with significantly enhanced detail
261
  def depth_to_mesh(depth_map, image, resolution=100, detail_level='medium'):
262
+ """Convert depth map to 3D mesh with highly improved detail preservation"""
263
  # First, enhance the depth map for better details
264
  enhanced_depth = enhance_depth_map(depth_map, detail_level)
265
 
 
271
  y = np.linspace(0, h-1, resolution)
272
  x_grid, y_grid = np.meshgrid(x, y)
273
 
274
+ # Use bicubic interpolation for smoother surface with better details
275
+ # Create interpolation function
276
  interp_func = interpolate.RectBivariateSpline(
277
  np.arange(h), np.arange(w), enhanced_depth, kx=3, ky=3
278
  )
279
+
280
+ # Sample depth at grid points with the interpolation function
281
  z_values = interp_func(y, x, grid=True)
282
 
283
+ # Apply a post-processing step to enhance small details even further
284
+ if detail_level == 'high':
285
+ # Calculate local gradients to detect edges
286
+ dx = np.gradient(z_values, axis=1)
287
+ dy = np.gradient(z_values, axis=0)
288
+
289
+ # Enhance edges by increasing depth differences at high gradient areas
290
+ gradient_magnitude = np.sqrt(dx**2 + dy**2)
291
+ edge_mask = np.clip(gradient_magnitude * 5, 0, 0.2) # Scale and limit effect
292
+
293
+ # Apply edge enhancement
294
+ z_values = z_values + edge_mask * (z_values - gaussian_filter(z_values, sigma=1.0))
295
+
296
+ # Normalize z-values with advanced scaling for better depth impression
297
+ z_min, z_max = np.percentile(z_values, [2, 98]) # Remove outliers
298
+ z_values = (z_values - z_min) / (z_max - z_min) if z_max > z_min else z_values
299
+
300
  # Apply depth scaling appropriate to the detail level
301
  if detail_level == 'high':
302
+ z_scaling = 2.5 # More pronounced depth variations
303
  elif detail_level == 'medium':
304
  z_scaling = 2.0 # Standard depth
305
  else:
306
+ z_scaling = 1.5 # More subtle depth variations
307
 
308
  z_values = z_values * z_scaling
309
 
310
+ # Normalize x and y coordinates
311
  x_grid = (x_grid / w - 0.5) * 2.0 # Map to -1 to 1
312
  y_grid = (y_grid / h - 0.5) * 2.0 # Map to -1 to 1
313
 
314
  # Create vertices
315
  vertices = np.vstack([x_grid.flatten(), -y_grid.flatten(), -z_values.flatten()]).T
316
 
317
+ # Create faces (triangles) with optimized winding for better normals
318
  faces = []
319
  for i in range(resolution-1):
320
  for j in range(resolution-1):
 
323
  p3 = (i + 1) * resolution + j
324
  p4 = (i + 1) * resolution + (j + 1)
325
 
326
+ # Calculate normals to ensure consistent orientation
327
+ v1 = vertices[p1]
328
+ v2 = vertices[p2]
329
+ v3 = vertices[p3]
330
+ v4 = vertices[p4]
331
+
332
+ # Calculate normals for both possible triangulations
333
+ # and choose the one that's more consistent
334
+ norm1 = np.cross(v2-v1, v4-v1)
335
+ norm2 = np.cross(v4-v3, v1-v3)
336
+
337
+ if np.dot(norm1, norm2) >= 0:
338
+ # Standard triangulation
339
+ faces.append([p1, p2, p4])
340
+ faces.append([p1, p4, p3])
341
+ else:
342
+ # Alternative triangulation for smoother surface
343
+ faces.append([p1, p2, p3])
344
+ faces.append([p2, p4, p3])
345
 
346
  faces = np.array(faces)
347
 
348
  # Create mesh
349
  mesh = trimesh.Trimesh(vertices=vertices, faces=faces)
350
 
351
+ # Apply advanced texturing if image is provided
352
+ if image:
353
  # Convert to numpy array if needed
354
  if isinstance(image, Image.Image):
355
  img_array = np.array(image)
356
  else:
357
  img_array = image
358
 
359
+ # Create vertex colors with improved sampling
360
+ if resolution <= img_array.shape[0] and resolution <= img_array.shape[1]:
361
+ # Create vertex colors by sampling the image with bilinear interpolation
362
  vertex_colors = np.zeros((vertices.shape[0], 4), dtype=np.uint8)
363
 
364
+ # Get normalized coordinates for sampling
365
  for i in range(resolution):
366
  for j in range(resolution):
367
+ # Calculate exact image coordinates with proper scaling
368
+ img_x = j * (img_array.shape[1] - 1) / (resolution - 1)
369
+ img_y = i * (img_array.shape[0] - 1) / (resolution - 1)
370
+
371
+ # Bilinear interpolation for smooth color transitions
372
+ x0, y0 = int(img_x), int(img_y)
373
+ x1, y1 = min(x0 + 1, img_array.shape[1] - 1), min(y0 + 1, img_array.shape[0] - 1)
374
+
375
+ # Calculate interpolation weights
376
+ wx = img_x - x0
377
+ wy = img_y - y0
378
 
379
  vertex_idx = i * resolution + j
380
 
381
  if len(img_array.shape) == 3 and img_array.shape[2] == 3: # RGB
382
+ # Perform bilinear interpolation for each color channel
383
+ r = int((1-wx)*(1-wy)*img_array[y0, x0, 0] + wx*(1-wy)*img_array[y0, x1, 0] +
384
+ (1-wx)*wy*img_array[y1, x0, 0] + wx*wy*img_array[y1, x1, 0])
385
+ g = int((1-wx)*(1-wy)*img_array[y0, x0, 1] + wx*(1-wy)*img_array[y0, x1, 1] +
386
+ (1-wx)*wy*img_array[y1, x0, 1] + wx*wy*img_array[y1, x1, 1])
387
+ b = int((1-wx)*(1-wy)*img_array[y0, x0, 2] + wx*(1-wy)*img_array[y0, x1, 2] +
388
+ (1-wx)*wy*img_array[y1, x0, 2] + wx*wy*img_array[y1, x1, 2])
389
+
390
+ vertex_colors[vertex_idx, :3] = [r, g, b]
391
+ vertex_colors[vertex_idx, 3] = 255 # Alpha
392
  elif len(img_array.shape) == 3 and img_array.shape[2] == 4: # RGBA
393
+ for c in range(4): # For each RGBA channel
394
+ vertex_colors[vertex_idx, c] = int((1-wx)*(1-wy)*img_array[y0, x0, c] +
395
+ wx*(1-wy)*img_array[y0, x1, c] +
396
+ (1-wx)*wy*img_array[y1, x0, c] +
397
+ wx*wy*img_array[y1, x1, c])
398
  else:
399
+ # Handle grayscale with bilinear interpolation
400
+ gray = int((1-wx)*(1-wy)*img_array[y0, x0] + wx*(1-wy)*img_array[y0, x1] +
401
+ (1-wx)*wy*img_array[y1, x0] + wx*wy*img_array[y1, x1])
402
+ vertex_colors[vertex_idx, :3] = [gray, gray, gray]
403
+ vertex_colors[vertex_idx, 3] = 255
 
 
 
404
 
405
  mesh.visual.vertex_colors = vertex_colors
406
 
407
  # Apply smoothing to get rid of staircase artifacts
408
  if detail_level != 'high':
409
+ # For medium and low detail, apply Laplacian smoothing
410
+ # but preserve the overall shape
411
+ mesh = mesh.smoothed(method='laplacian', iterations=1)
 
 
412
 
413
+ # Calculate and fix normals for better rendering
414
+ mesh.fix_normals()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  return mesh
417
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
  @app.route('/health', methods=['GET'])
419
  def health_check():
420
  return jsonify({
421
  "status": "healthy",
422
+ "model": "Enhanced Depth-Based 3D Model Generator (DPT-Large)",
423
+ "device": "cuda" if torch.cuda.is_available() else "cpu"
 
424
  }), 200
425
 
426
  @app.route('/progress/<job_id>', methods=['GET'])
 
446
  time.sleep(0.5)
447
  check_count += 1
448
 
449
+ # If client hasn't received updates for a while, check if job is still running
450
  if check_count > 60: # 30 seconds with no updates
451
  if 'thread_alive' in job and not job['thread_alive']():
452
  job['status'] = 'error'
453
  job['error'] = 'Processing thread died unexpectedly'
454
  break
455
  check_count = 0
456
+
457
  # Send final status
458
  if job['status'] == 'completed':
459
  yield f"data: {json.dumps({'status': 'completed', 'progress': 100, 'result_url': job['result_url'], 'preview_url': job['preview_url']})}\n\n"
 
480
  mesh_resolution = min(int(request.form.get('mesh_resolution', 100)), 200) # Limit max resolution
481
  output_format = request.form.get('output_format', 'obj').lower()
482
  detail_level = request.form.get('detail_level', 'medium').lower() # Parameter for detail level
483
+ texture_quality = request.form.get('texture_quality', 'medium').lower() # New parameter for texture quality
 
 
 
 
 
 
484
  except ValueError:
485
  return jsonify({"error": "Invalid parameter values"}), 400
486
 
 
488
  if output_format not in ['obj', 'glb']:
489
  return jsonify({"error": "Unsupported output format. Use 'obj' or 'glb'"}), 400
490
 
491
+ # Adjust mesh resolution based on detail level
492
+ if detail_level == 'high':
493
+ mesh_resolution = min(int(mesh_resolution * 1.5), 200)
494
+ elif detail_level == 'low':
495
+ mesh_resolution = max(int(mesh_resolution * 0.7), 50)
496
+
497
  # Create a job ID
498
  job_id = str(uuid.uuid4())
499
  output_dir = os.path.join(RESULTS_FOLDER, job_id)
 
526
  image = preprocess_image(filepath)
527
  processing_jobs[job_id]['progress'] = 10
528
 
529
+ # Load model
530
+ try:
531
+ model = load_model()
532
+ processing_jobs[job_id]['progress'] = 30
533
+ except Exception as e:
534
+ processing_jobs[job_id]['status'] = 'error'
535
+ processing_jobs[job_id]['error'] = f"Error loading model: {str(e)}"
536
+ return
537
 
538
+ # Process image with thread-safe timeout
539
+ try:
540
+ def estimate_depth():
541
+ # Get depth map
542
+ result = model(image)
543
+ depth_map = result["depth"]
544
+
545
+ # Convert to numpy array if needed
546
+ if isinstance(depth_map, torch.Tensor):
547
+ depth_map = depth_map.cpu().numpy()
548
+ elif hasattr(depth_map, 'numpy'):
549
+ depth_map = depth_map.numpy()
550
+ elif isinstance(depth_map, Image.Image):
551
+ depth_map = np.array(depth_map)
552
+
553
+ return depth_map
554
+
555
+ depth_map, error = process_with_timeout(estimate_depth, [], TIMEOUT_SECONDS)
556
+
557
+ if error:
558
+ if isinstance(error, TimeoutError):
559
+ processing_jobs[job_id]['status'] = 'error'
560
+ processing_jobs[job_id]['error'] = f"Processing timed out after {TIMEOUT_SECONDS} seconds"
561
+ return
562
+ else:
563
+ raise error
564
+
565
+ processing_jobs[job_id]['progress'] = 60
566
+
567
+ # Create mesh from depth map with enhanced detail handling
568
+ mesh_resolution_int = int(mesh_resolution)
569
+ mesh = depth_to_mesh(depth_map, image, resolution=mesh_resolution_int, detail_level=detail_level)
570
+ processing_jobs[job_id]['progress'] = 80
571
+
572
+ except Exception as e:
573
+ error_details = traceback.format_exc()
574
+ processing_jobs[job_id]['status'] = 'error'
575
+ processing_jobs[job_id]['error'] = f"Error during processing: {str(e)}"
576
+ print(f"Error processing job {job_id}: {str(e)}")
577
+ print(error_details)
578
+ return
579
 
580
+ # Export based on requested format with enhanced quality settings
581
  try:
582
  if output_format == 'obj':
583
  obj_path = os.path.join(output_dir, "model.obj")
 
607
  processing_jobs[job_id]['preview_url'] = f"/preview/{job_id}"
608
 
609
  elif output_format == 'glb':
610
+ # Export as GLB with enhanced settings
611
  glb_path = os.path.join(output_dir, "model.glb")
612
  mesh.export(
613
  glb_path,
 
620
  # Update job status
621
  processing_jobs[job_id]['status'] = 'completed'
622
  processing_jobs[job_id]['progress'] = 100
 
623
  print(f"Job {job_id} completed successfully")
624
  except Exception as e:
625
  error_details = traceback.format_exc()
 
633
  os.remove(filepath)
634
 
635
  # Force garbage collection to free memory
636
+ gc.collect()
637
+ if torch.cuda.is_available():
638
+ torch.cuda.empty_cache()
639
 
640
  except Exception as e:
641
  # Handle errors
 
729
  # Schedule the next cleanup
730
  threading.Timer(300, cleanup_old_jobs).start() # Run every 5 minutes
731
 
732
+ # New endpoint to get detailed information about a model
733
  @app.route('/model-info/<job_id>', methods=['GET'])
734
  def model_info(job_id):
735
  if job_id not in processing_jobs:
 
778
  @app.route('/', methods=['GET'])
779
  def index():
780
  return jsonify({
781
+ "message": "Enhanced Image to 3D API (DPT-Large Model)",
782
  "endpoints": [
783
  "/convert",
784
  "/progress/<job_id>",
 
790
  "mesh_resolution": "Integer (50-200), controls mesh density",
791
  "output_format": "obj or glb",
792
  "detail_level": "low, medium, or high - controls the level of detail in the final model",
793
+ "texture_quality": "low, medium, or high - controls the quality of textures"
794
  },
795
+ "description": "This API creates high-quality 3D models from 2D images with enhanced detail finishing similar to Hunyuan model"
 
796
  }), 200
797
 
798
+ # Example endpoint showing how to compare different detail levels
799
+ @app.route('/detail-comparison', methods=['POST'])
800
+ def compare_detail_levels():
801
+ # Check if image is in the request
802
+ if 'image' not in request.files:
803
+ return jsonify({"error": "No image provided"}), 400
804
 
805
+ file = request.files['image']
806
+ if file.filename == '':
807
+ return jsonify({"error": "No image selected"}), 400
808
+
809
+ if not allowed_file(file.filename):
810
+ return jsonify({"error": f"File type not allowed. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
811
+
812
+ # Create a job ID
813
+ job_id = str(uuid.uuid4())
814
+ output_dir = os.path.join(RESULTS_FOLDER, job_id)
815
+ os.makedirs(output_dir, exist_ok=True)
816
+
817
+ # Save the uploaded file
818
+ filename = secure_filename(file.filename)
819
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{job_id}_{filename}")
820
+ file.save(filepath)
821
+
822
+ # Initialize job tracking
823
+ processing_jobs[job_id] = {
824
+ 'status': 'processing',
825
+ 'progress': 0,
826
+ 'result_url': None,
827
+ 'preview_url': None,
828
+ 'error': None,
829
+ 'output_format': 'glb', # Use GLB for comparison
830
+ 'created_at': time.time(),
831
+ 'comparison': True
832
+ }
833
+
834
+ # Process in separate thread to create 3 different detail levels
835
+ def process_comparison():
836
+ thread = threading.current_thread()
837
+ processing_jobs[job_id]['thread_alive'] = lambda: thread.is_alive()
838
+
839
  try:
840
+ # Preprocess image
841
+ image = preprocess_image(filepath)
842
+ processing_jobs[job_id]['progress'] = 10
843
+
844
+ # Load model
845
+ try:
846
+ model = load_model()
847
+ processing_jobs[job_id]['progress'] = 20
848
+ except Exception as e:
849
+ processing_jobs[job_id]['status'] = 'error'
850
+ processing_jobs[job_id]['error'] = f"Error loading model: {str(e)}"
851
+ return
852
+
853
+ # Process image to get depth map
854
+ try:
855
+ depth_map = model(image)["depth"]
856
+ if isinstance(depth_map, torch.Tensor):
857
+ depth_map = depth_map.cpu().numpy()
858
+ elif hasattr(depth_map, 'numpy'):
859
+ depth_map = depth_map.numpy()
860
+ elif isinstance(depth_map, Image.Image):
861
+ depth_map = np.array(depth_map)
862
+
863
+ processing_jobs[job_id]['progress'] = 40
864
+ except Exception as e:
865
+ processing_jobs[job_id]['status'] = 'error'
866
+ processing_jobs[job_id]['error'] = f"Error estimating depth: {str(e)}"
867
+ return
868
+
869
+ # Create meshes at different detail levels
870
+ result_urls = {}
871
+
872
+ for detail_level in ['low', 'medium', 'high']:
873
+ try:
874
+ # Update progress
875
+ if detail_level == 'low':
876
+ processing_jobs[job_id]['progress'] = 50
877
+ elif detail_level == 'medium':
878
+ processing_jobs[job_id]['progress'] = 70
879
+ else:
880
+ processing_jobs[job_id]['progress'] = 90
881
+
882
+ # Create mesh with appropriate detail level
883
+ mesh_resolution = 100 # Fixed resolution for fair comparison
884
+ if detail_level == 'high':
885
+ mesh_resolution = 150
886
+ elif detail_level == 'low':
887
+ mesh_resolution = 80
888
+
889
+ mesh = depth_to_mesh(depth_map, image,
890
+ resolution=mesh_resolution,
891
+ detail_level=detail_level)
892
+
893
+ # Export as GLB
894
+ model_path = os.path.join(output_dir, f"model_{detail_level}.glb")
895
+ mesh.export(model_path, file_type='glb')
896
+
897
+ # Add to result URLs
898
+ result_urls[detail_level] = f"/compare-download/{job_id}/{detail_level}"
899
+
900
+ except Exception as e:
901
+ print(f"Error processing {detail_level} detail level: {str(e)}")
902
+ # Continue with other detail levels even if one fails
903
+
904
+ # Update job status
905
+ processing_jobs[job_id]['status'] = 'completed'
906
+ processing_jobs[job_id]['progress'] = 100
907
+ processing_jobs[job_id]['result_urls'] = result_urls
908
+ processing_jobs[job_id]['completed_at'] = time.time()
909
+
910
+ # Clean up temporary file
911
+ if os.path.exists(filepath):
912
+ os.remove(filepath)
913
+
914
+ # Force garbage collection
915
+ gc.collect()
916
+ if torch.cuda.is_available():
917
+ torch.cuda.empty_cache()
918
+
919
  except Exception as e:
920
+ # Handle errors
921
+ processing_jobs[job_id]['status'] = 'error'
922
+ processing_jobs[job_id]['error'] = f"Error during processing: {str(e)}"
923
+
924
+ # Clean up on error
925
+ if os.path.exists(filepath):
926
+ os.remove(filepath)
927
 
928
+ # Start processing thread
929
+ processing_thread = threading.Thread(target=process_comparison)
930
+ processing_thread.daemon = True
931
+ processing_thread.start()
932
+
933
+ # Return job ID immediately
934
+ return jsonify({"job_id": job_id, "check_progress_at": f"/progress/{job_id}"}), 202
935
 
936
+ @app.route('/compare-download/<job_id>/<detail_level>', methods=['GET'])
937
+ def download_comparison_model(job_id, detail_level):
938
+ if job_id not in processing_jobs or processing_jobs[job_id]['status'] != 'completed':
939
+ return jsonify({"error": "Model not found or processing not complete"}), 404
940
+
941
+ if 'comparison' not in processing_jobs[job_id] or not processing_jobs[job_id]['comparison']:
942
+ return jsonify({"error": "This is not a comparison job"}), 400
943
+
944
+ if detail_level not in ['low', 'medium', 'high']:
945
+ return jsonify({"error": "Invalid detail level"}), 400
946
 
947
+ # Get the output directory for this job
948
+ output_dir = os.path.join(RESULTS_FOLDER, job_id)
949
+ model_path = os.path.join(output_dir, f"model_{detail_level}.glb")
950
+
951
+ if os.path.exists(model_path):
952
+ return send_file(model_path, as_attachment=True, download_name=f"model_{detail_level}.glb")
953
+
954
+ return jsonify({"error": "File not found"}), 404
955
+
956
+ if __name__ == '__main__':
957
  # Start the cleanup thread
958
  cleanup_old_jobs()
959
 
960
  # Use port 7860 which is standard for Hugging Face Spaces
961
  port = int(os.environ.get('PORT', 7860))
962
+ app.run(host='0.0.0.0', port=port)