Spaces:

Xalphinions
/

watermelon2

Sleeping

App Files Files Community

Xalphinions commited on Apr 6

Commit

fb889d2

verified ·

1 Parent(s): 945fdb4

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +148 -288

app.py CHANGED Viewed

@@ -6,14 +6,14 @@ import gradio as gr
 import torchaudio
 import torchvision
-# Import Gradio Spaces GPU decorator
-try:
-    from gradio import spaces
-    HAS_SPACES = True
-    print("\033[92mINFO\033[0m: Gradio Spaces detected, GPU acceleration will be enabled")
-except ImportError:
-    HAS_SPACES = False
-    print("\033[93mWARN\033[0m: gradio.spaces not available, running without GPU optimization")
 # Add parent directory to path to import preprocess functions
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -78,291 +78,151 @@ def app_process_audio_data(waveform, sample_rate):
 # Similarly for images, but let's import the original one
 from preprocess import process_image_data
-# Apply GPU decorator directly to the function if available
-if HAS_SPACES:
     # Using the decorator directly on the function definition
-    @spaces.GPU
-    def predict_sweetness(audio, image, model_path):
-        """Function with GPU acceleration"""
-        try:
-            # Now check CUDA availability inside the GPU-decorated function
-            if torch.cuda.is_available():
-                device = torch.device("cuda")
-                print(f"\033[92mINFO\033[0m: CUDA is available. Using device: {device}")
-            else:
-                device = torch.device("cpu")
-                print(f"\033[92mINFO\033[0m: CUDA is not available. Using device: {device}")
-            # Load model inside the function to ensure it's on the correct device
-            model = WatermelonModel().to(device)
-            model.load_state_dict(torch.load(model_path, map_location=device))
-            model.eval()
-            print(f"\033[92mINFO\033[0m: Loaded model from {model_path}")
-            # Debug information about input types
-            print(f"\033[92mDEBUG\033[0m: Audio input type: {type(audio)}")
-            print(f"\033[92mDEBUG\033[0m: Audio input shape/length: {len(audio)}")
-            print(f"\033[92mDEBUG\033[0m: Image input type: {type(image)}")
-            if isinstance(image, np.ndarray):
-                print(f"\033[92mDEBUG\033[0m: Image input shape: {image.shape}")
-            # Handle different audio input formats
-            if isinstance(audio, tuple) and len(audio) == 2:
-                # Standard Gradio format: (sample_rate, audio_data)
-                sample_rate, audio_data = audio
-                print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
-                print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
-            elif isinstance(audio, tuple) and len(audio) > 2:
-                # Sometimes Gradio returns (sample_rate, audio_data, other_info...)
-                sample_rate, audio_data = audio[0], audio[-1]
-                print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
-                print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
-            elif isinstance(audio, str):
-                # Direct path to audio file
-                audio_data, sample_rate = torchaudio.load(audio)
-                print(f"\033[92mDEBUG\033[0m: Loaded audio from path with shape: {audio_data.shape}")
-            else:
-                return f"Error: Unsupported audio format. Got {type(audio)}"
-            # Create a temporary file path for the audio and image
-            temp_dir = "temp"
-            os.makedirs(temp_dir, exist_ok=True)
-            temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
-            temp_image_path = os.path.join(temp_dir, "temp_image.jpg")
-            # Import necessary libraries
-            from PIL import Image
-            # Audio handling - direct processing from the data in memory
-            if isinstance(audio_data, np.ndarray):
-                # Convert numpy array to tensor
-                print(f"\033[92mDEBUG\033[0m: Converting numpy audio with shape {audio_data.shape} to tensor")
-                audio_tensor = torch.tensor(audio_data).float()
-                # Handle different audio dimensions
-                if audio_data.ndim == 1:
-                    # Single channel audio
-                    audio_tensor = audio_tensor.unsqueeze(0)
-                elif audio_data.ndim == 2:
-                    # Ensure channels are first dimension
-                    if audio_data.shape[0] > audio_data.shape[1]:
-                        # More rows than columns, probably (samples, channels)
-                        audio_tensor = torch.tensor(audio_data.T).float()
-            else:
-                # Already a tensor
-                audio_tensor = audio_data.float()
-            print(f"\033[92mDEBUG\033[0m: Audio tensor shape before processing: {audio_tensor.shape}")
-            # Skip saving/loading and process directly
-            mfcc = app_process_audio_data(audio_tensor, sample_rate)
-            print(f"\033[92mDEBUG\033[0m: MFCC tensor shape after processing: {mfcc.shape if mfcc is not None else None}")
-            # Image handling
-            if isinstance(image, np.ndarray):
-                print(f"\033[92mDEBUG\033[0m: Converting numpy image with shape {image.shape} to PIL")
-                pil_image = Image.fromarray(image)
-                pil_image.save(temp_image_path)
-                print(f"\033[92mDEBUG\033[0m: Saved image to {temp_image_path}")
-            elif isinstance(image, str):
-                # If image is already a path
-                temp_image_path = image
-                print(f"\033[92mDEBUG\033[0m: Using provided image path: {temp_image_path}")
-            else:
-                return f"Error: Unsupported image format. Got {type(image)}"
-            # Process image
-            print(f"\033[92mDEBUG\033[0m: Loading and preprocessing image from {temp_image_path}")
-            image_tensor = torchvision.io.read_image(temp_image_path)
-            print(f"\033[92mDEBUG\033[0m: Loaded image shape: {image_tensor.shape}")
-            image_tensor = image_tensor.float()
-            processed_image = process_image_data(image_tensor)
-            print(f"\033[92mDEBUG\033[0m: Processed image shape: {processed_image.shape if processed_image is not None else None}")
-            # Add batch dimension for inference and move to device
-            if mfcc is not None:
-                mfcc = mfcc.unsqueeze(0).to(device)
-                print(f"\033[92mDEBUG\033[0m: Final MFCC shape with batch dimension: {mfcc.shape}")
-            if processed_image is not None:
-                processed_image = processed_image.unsqueeze(0).to(device)
-                print(f"\033[92mDEBUG\033[0m: Final image shape with batch dimension: {processed_image.shape}")
-            # Run inference
-            print(f"\033[92mDEBUG\033[0m: Running inference on device: {device}")
-            if mfcc is not None and processed_image is not None:
-                with torch.no_grad():
-                    sweetness = model(mfcc, processed_image)
-                    print(f"\033[92mDEBUG\033[0m: Prediction successful: {sweetness.item()}")
             else:
-                return "Error: Failed to process inputs. Please check the debug logs."
-            # Format the result
-            if sweetness is not None:
-                result = f"Predicted Sweetness: {sweetness.item():.2f}/13"
-                # Add a qualitative description
-                if sweetness.item() < 9:
-                    result += "\n\nThis watermelon is not very sweet. You might want to choose another one."
-                elif sweetness.item() < 10:
-                    result += "\n\nThis watermelon has moderate sweetness."
-                elif sweetness.item() < 11:
-                    result += "\n\nThis watermelon is sweet! A good choice."
-                else:
-                    result += "\n\nThis watermelon is very sweet! Excellent choice!"
-                return result
-            else:
-                return "Error: Could not predict sweetness. Please try again with different inputs."
-        except Exception as e:
-            import traceback
-            error_msg = f"Error: {str(e)}\n\n"
-            error_msg += traceback.format_exc()
-            print(f"\033[91mERR!\033[0m: {error_msg}")
-            return error_msg
     print("\033[92mINFO\033[0m: GPU-accelerated prediction function created with @spaces.GPU decorator")
-else:
-    # Regular version without GPU decorator for non-Spaces environments
-    def predict_sweetness(audio, image, model_path):
-        """Predict sweetness of a watermelon from audio and image input"""
-        try:
-            # Check for device - will be CPU in this case
-            device = torch.device("cpu")
-            print(f"\033[92mINFO\033[0m: Using device: {device}")
-            # Load model inside the function
-            model = WatermelonModel().to(device)
-            model.load_state_dict(torch.load(model_path, map_location=device))
-            model.eval()
-            print(f"\033[92mINFO\033[0m: Loaded model from {model_path}")
-            # Rest of function identical - processing code
-            # Debug information about input types
-            print(f"\033[92mDEBUG\033[0m: Audio input type: {type(audio)}")
-            print(f"\033[92mDEBUG\033[0m: Audio input shape/length: {len(audio)}")
-            print(f"\033[92mDEBUG\033[0m: Image input type: {type(image)}")
-            if isinstance(image, np.ndarray):
-                print(f"\033[92mDEBUG\033[0m: Image input shape: {image.shape}")
-            # Handle different audio input formats
-            if isinstance(audio, tuple) and len(audio) == 2:
-                # Standard Gradio format: (sample_rate, audio_data)
-                sample_rate, audio_data = audio
-                print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
-                print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
-            elif isinstance(audio, tuple) and len(audio) > 2:
-                # Sometimes Gradio returns (sample_rate, audio_data, other_info...)
-                sample_rate, audio_data = audio[0], audio[-1]
-                print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
-                print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
-            elif isinstance(audio, str):
-                # Direct path to audio file
-                audio_data, sample_rate = torchaudio.load(audio)
-                print(f"\033[92mDEBUG\033[0m: Loaded audio from path with shape: {audio_data.shape}")
-            else:
-                return f"Error: Unsupported audio format. Got {type(audio)}"
-            # Create a temporary file path for the audio and image
-            temp_dir = "temp"
-            os.makedirs(temp_dir, exist_ok=True)
-            temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
-            temp_image_path = os.path.join(temp_dir, "temp_image.jpg")
-            # Import necessary libraries
-            from PIL import Image
-            # Audio handling - direct processing from the data in memory
-            if isinstance(audio_data, np.ndarray):
-                # Convert numpy array to tensor
-                print(f"\033[92mDEBUG\033[0m: Converting numpy audio with shape {audio_data.shape} to tensor")
-                audio_tensor = torch.tensor(audio_data).float()
-                # Handle different audio dimensions
-                if audio_data.ndim == 1:
-                    # Single channel audio
-                    audio_tensor = audio_tensor.unsqueeze(0)
-                elif audio_data.ndim == 2:
-                    # Ensure channels are first dimension
-                    if audio_data.shape[0] > audio_data.shape[1]:
-                        # More rows than columns, probably (samples, channels)
-                        audio_tensor = torch.tensor(audio_data.T).float()
-            else:
-                # Already a tensor
-                audio_tensor = audio_data.float()
-            print(f"\033[92mDEBUG\033[0m: Audio tensor shape before processing: {audio_tensor.shape}")
-            # Skip saving/loading and process directly
-            mfcc = app_process_audio_data(audio_tensor, sample_rate)
-            print(f"\033[92mDEBUG\033[0m: MFCC tensor shape after processing: {mfcc.shape if mfcc is not None else None}")
-            # Image handling
-            if isinstance(image, np.ndarray):
-                print(f"\033[92mDEBUG\033[0m: Converting numpy image with shape {image.shape} to PIL")
-                pil_image = Image.fromarray(image)
-                pil_image.save(temp_image_path)
-                print(f"\033[92mDEBUG\033[0m: Saved image to {temp_image_path}")
-            elif isinstance(image, str):
-                # If image is already a path
-                temp_image_path = image
-                print(f"\033[92mDEBUG\033[0m: Using provided image path: {temp_image_path}")
-            else:
-                return f"Error: Unsupported image format. Got {type(image)}"
-            # Process image
-            print(f"\033[92mDEBUG\033[0m: Loading and preprocessing image from {temp_image_path}")
-            image_tensor = torchvision.io.read_image(temp_image_path)
-            print(f"\033[92mDEBUG\033[0m: Loaded image shape: {image_tensor.shape}")
-            image_tensor = image_tensor.float()
-            processed_image = process_image_data(image_tensor)
-            print(f"\033[92mDEBUG\033[0m: Processed image shape: {processed_image.shape if processed_image is not None else None}")
-            # Add batch dimension for inference and move to device
-            if mfcc is not None:
-                mfcc = mfcc.unsqueeze(0).to(device)
-                print(f"\033[92mDEBUG\033[0m: Final MFCC shape with batch dimension: {mfcc.shape}")
-            if processed_image is not None:
-                processed_image = processed_image.unsqueeze(0).to(device)
-                print(f"\033[92mDEBUG\033[0m: Final image shape with batch dimension: {processed_image.shape}")
-            # Run inference
-            print(f"\033[92mDEBUG\033[0m: Running inference on device: {device}")
-            if mfcc is not None and processed_image is not None:
-                with torch.no_grad():
-                    sweetness = model(mfcc, processed_image)
-                    print(f"\033[92mDEBUG\033[0m: Prediction successful: {sweetness.item()}")
-            else:
-                return "Error: Failed to process inputs. Please check the debug logs."
-            # Format the result
-            if sweetness is not None:
-                result = f"Predicted Sweetness: {sweetness.item():.2f}/13"
-                # Add a qualitative description
-                if sweetness.item() < 9:
-                    result += "\n\nThis watermelon is not very sweet. You might want to choose another one."
-                elif sweetness.item() < 10:
-                    result += "\n\nThis watermelon has moderate sweetness."
-                elif sweetness.item() < 11:
-                    result += "\n\nThis watermelon is sweet! A good choice."
-                else:
-                    result += "\n\nThis watermelon is very sweet! Excellent choice!"
-                return result
-            else:
-                return "Error: Could not predict sweetness. Please try again with different inputs."
-        except Exception as e:
-            import traceback
-            error_msg = f"Error: {str(e)}\n\n"
-            error_msg += traceback.format_exc()
-            print(f"\033[91mERR!\033[0m: {error_msg}")
-            return error_msg
 def create_app(model_path):
     """Create and launch the Gradio interface"""

 import torchaudio
 import torchvision
+# # Import Gradio Spaces GPU decorator
+# try:
+#     from gradio import spaces
+#     HAS_SPACES = True
+#     print("\033[92mINFO\033[0m: Gradio Spaces detected, GPU acceleration will be enabled")
+# except ImportError:
+#     HAS_SPACES = False
+#     print("\033[93mWARN\033[0m: gradio.spaces not available, running without GPU optimization")
 # Add parent directory to path to import preprocess functions
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 # Similarly for images, but let's import the original one
 from preprocess import process_image_data
     # Using the decorator directly on the function definition
+@spaces.GPU
+def predict_sweetness(audio, image, model_path):
+    """Function with GPU acceleration"""
+    try:
+        # Now check CUDA availability inside the GPU-decorated function
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+            print(f"\033[92mINFO\033[0m: CUDA is available. Using device: {device}")
+        else:
+            device = torch.device("cpu")
+            print(f"\033[92mINFO\033[0m: CUDA is not available. Using device: {device}")
+        # Load model inside the function to ensure it's on the correct device
+        model = WatermelonModel().to(device)
+        model.load_state_dict(torch.load(model_path, map_location=device))
+        model.eval()
+        print(f"\033[92mINFO\033[0m: Loaded model from {model_path}")
+        # Debug information about input types
+        print(f"\033[92mDEBUG\033[0m: Audio input type: {type(audio)}")
+        print(f"\033[92mDEBUG\033[0m: Audio input shape/length: {len(audio)}")
+        print(f"\033[92mDEBUG\033[0m: Image input type: {type(image)}")
+        if isinstance(image, np.ndarray):
+            print(f"\033[92mDEBUG\033[0m: Image input shape: {image.shape}")
+        # Handle different audio input formats
+        if isinstance(audio, tuple) and len(audio) == 2:
+            # Standard Gradio format: (sample_rate, audio_data)
+            sample_rate, audio_data = audio
+            print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
+            print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
+        elif isinstance(audio, tuple) and len(audio) > 2:
+            # Sometimes Gradio returns (sample_rate, audio_data, other_info...)
+            sample_rate, audio_data = audio[0], audio[-1]
+            print(f"\033[92mDEBUG\033[0m: Audio sample rate: {sample_rate}")
+            print(f"\033[92mDEBUG\033[0m: Audio data shape: {audio_data.shape}")
+        elif isinstance(audio, str):
+            # Direct path to audio file
+            audio_data, sample_rate = torchaudio.load(audio)
+            print(f"\033[92mDEBUG\033[0m: Loaded audio from path with shape: {audio_data.shape}")
+        else:
+            return f"Error: Unsupported audio format. Got {type(audio)}"
+        # Create a temporary file path for the audio and image
+        temp_dir = "temp"
+        os.makedirs(temp_dir, exist_ok=True)
+        temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
+        temp_image_path = os.path.join(temp_dir, "temp_image.jpg")
+        # Import necessary libraries
+        from PIL import Image
+        # Audio handling - direct processing from the data in memory
+        if isinstance(audio_data, np.ndarray):
+            # Convert numpy array to tensor
+            print(f"\033[92mDEBUG\033[0m: Converting numpy audio with shape {audio_data.shape} to tensor")
+            audio_tensor = torch.tensor(audio_data).float()
+            # Handle different audio dimensions
+            if audio_data.ndim == 1:
+                # Single channel audio
+                audio_tensor = audio_tensor.unsqueeze(0)
+            elif audio_data.ndim == 2:
+                # Ensure channels are first dimension
+                if audio_data.shape[0] > audio_data.shape[1]:
+                    # More rows than columns, probably (samples, channels)
+                    audio_tensor = torch.tensor(audio_data.T).float()
+        else:
+            # Already a tensor
+            audio_tensor = audio_data.float()
+        print(f"\033[92mDEBUG\033[0m: Audio tensor shape before processing: {audio_tensor.shape}")
+        # Skip saving/loading and process directly
+        mfcc = app_process_audio_data(audio_tensor, sample_rate)
+        print(f"\033[92mDEBUG\033[0m: MFCC tensor shape after processing: {mfcc.shape if mfcc is not None else None}")
+        # Image handling
+        if isinstance(image, np.ndarray):
+            print(f"\033[92mDEBUG\033[0m: Converting numpy image with shape {image.shape} to PIL")
+            pil_image = Image.fromarray(image)
+            pil_image.save(temp_image_path)
+            print(f"\033[92mDEBUG\033[0m: Saved image to {temp_image_path}")
+        elif isinstance(image, str):
+            # If image is already a path
+            temp_image_path = image
+            print(f"\033[92mDEBUG\033[0m: Using provided image path: {temp_image_path}")
+        else:
+            return f"Error: Unsupported image format. Got {type(image)}"
+        # Process image
+        print(f"\033[92mDEBUG\033[0m: Loading and preprocessing image from {temp_image_path}")
+        image_tensor = torchvision.io.read_image(temp_image_path)
+        print(f"\033[92mDEBUG\033[0m: Loaded image shape: {image_tensor.shape}")
+        image_tensor = image_tensor.float()
+        processed_image = process_image_data(image_tensor)
+        print(f"\033[92mDEBUG\033[0m: Processed image shape: {processed_image.shape if processed_image is not None else None}")
+        # Add batch dimension for inference and move to device
+        if mfcc is not None:
+            mfcc = mfcc.unsqueeze(0).to(device)
+            print(f"\033[92mDEBUG\033[0m: Final MFCC shape with batch dimension: {mfcc.shape}")
+        if processed_image is not None:
+            processed_image = processed_image.unsqueeze(0).to(device)
+            print(f"\033[92mDEBUG\033[0m: Final image shape with batch dimension: {processed_image.shape}")
+        # Run inference
+        print(f"\033[92mDEBUG\033[0m: Running inference on device: {device}")
+        if mfcc is not None and processed_image is not None:
+            with torch.no_grad():
+                sweetness = model(mfcc, processed_image)
+                print(f"\033[92mDEBUG\033[0m: Prediction successful: {sweetness.item()}")
+        else:
+            return "Error: Failed to process inputs. Please check the debug logs."
+        # Format the result
+        if sweetness is not None:
+            result = f"Predicted Sweetness: {sweetness.item():.2f}/13"
+            # Add a qualitative description
+            if sweetness.item() < 9:
+                result += "\n\nThis watermelon is not very sweet. You might want to choose another one."
+            elif sweetness.item() < 10:
+                result += "\n\nThis watermelon has moderate sweetness."
+            elif sweetness.item() < 11:
+                result += "\n\nThis watermelon is sweet! A good choice."
             else:
+                result += "\n\nThis watermelon is very sweet! Excellent choice!"
+            return result
+        else:
+            return "Error: Could not predict sweetness. Please try again with different inputs."
+    except Exception as e:
+        import traceback
+        error_msg = f"Error: {str(e)}\n\n"
+        error_msg += traceback.format_exc()
+        print(f"\033[91mERR!\033[0m: {error_msg}")
+        return error_msg
     print("\033[92mINFO\033[0m: GPU-accelerated prediction function created with @spaces.GPU decorator")
 def create_app(model_path):
     """Create and launch the Gradio interface"""