submission-template

Sleeping

@@ -10,7 +10,7 @@ from torch.utils.data import DataLoader
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from .utils.data import FFTDataset
-from .utils.models import DualEncoder
 from .utils.train import Trainer
 from .utils.data_utils import collate_fn, Container
 import yaml
@@ -70,13 +70,14 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     model_args = Container(**yaml.safe_load(open(args_path, 'r'))['CNNEncoder'])
     model_args_f = Container(**yaml.safe_load(open(args_path, 'r'))['CNNEncoder_f'])
     conformer_args = Container(**yaml.safe_load(open(args_path, 'r'))['Conformer'])
     test_dataset = FFTDataset(test_dataset)
     test_dl = DataLoader(test_dataset, batch_size=data_args.batch_size, collate_fn=collate_fn)
-    model = DualEncoder(model_args, model_args_f, conformer_args)
     model = model.to(device)
-    state_dict = torch.load(model_args.checkpoint_path)
     new_state_dict = OrderedDict()
     for key, value in state_dict.items():
         if key.startswith('module.'):
@@ -95,8 +96,12 @@ async def evaluate_audio(request: AudioEvaluationRequest):
                       accumulation_step=1, max_iter=np.inf,
                       exp_name=f"frugal_cnnencoder_inference")
     predictions, true_labels, acc = trainer.predict(test_dl, device=device)
     # Make random predictions (placeholder for actual model inference)
     print("accuracy: ", acc)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
@@ -128,15 +133,15 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     return results
-# if __name__ == "__main__":
 #     with open("../logs//token.txt", "r") as f:
 #         api_key = f.read()
 #     login(api_key)
 #     # Create a sample request object
-#     sample_request = AudioEvaluationRequest(
-#         dataset_name="rfcx/frugalai",  # Replace with actual dataset name
-#         test_size=0.2,  # Example values
-#         test_seed=42
-#     )
 #
-#     asyncio.run(evaluate_audio(sample_request))

 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from .utils.data import FFTDataset
+from .utils.models import DualEncoder, CNNKan
 from .utils.train import Trainer
 from .utils.data_utils import collate_fn, Container
 import yaml
     model_args = Container(**yaml.safe_load(open(args_path, 'r'))['CNNEncoder'])
     model_args_f = Container(**yaml.safe_load(open(args_path, 'r'))['CNNEncoder_f'])
     conformer_args = Container(**yaml.safe_load(open(args_path, 'r'))['Conformer'])
+    kan_args = Container(**yaml.safe_load(open(args_path, 'r'))['KAN'])
     test_dataset = FFTDataset(test_dataset)
     test_dl = DataLoader(test_dataset, batch_size=data_args.batch_size, collate_fn=collate_fn)
+    model = CNNKan(model_args, conformer_args, kan_args.get_dict())
     model = model.to(device)
+    state_dict = torch.load(data_args.checkpoint_path)
     new_state_dict = OrderedDict()
     for key, value in state_dict.items():
         if key.startswith('module.'):
                       accumulation_step=1, max_iter=np.inf,
                       exp_name=f"frugal_cnnencoder_inference")
     predictions, true_labels, acc = trainer.predict(test_dl, device=device)
+    # true_labels = test_dataset["label"]
     # Make random predictions (placeholder for actual model inference)
     print("accuracy: ", acc)
+    print("predictions: ", len(predictions))
+    print("true_labels: ", len(true_labels))
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     return results
+if __name__ == "__main__":
 #     with open("../logs//token.txt", "r") as f:
 #         api_key = f.read()
 #     login(api_key)
 #     # Create a sample request object
+    sample_request = AudioEvaluationRequest(
+        dataset_name="rfcx/frugalai",  # Replace with actual dataset name
+        test_size=0.2,  # Example values
+        test_seed=42
+    )
 #
+    asyncio.run(evaluate_audio(sample_request))

tasks/models/frugal_2025-01-21/CNNEncoder_frugal_2.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tasks/models/frugal_2025-01-21/frugal_kan_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28e0188edab4879996cc960d2dc79641460b270af9c5ac7d3eacad1f5e96da39
+size 1714830

tasks/run.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from torch.utils.data import DataLoader
+from .utils.data import FFTDataset, SplitDataset
+from datasets import load_dataset
+from .utils.train import Trainer
+from .utils.models import CNNKan, KanEncoder
+from .utils.data_utils import *
+from huggingface_hub import login
+import yaml
+import datetime
+import json
+import numpy as np
+# local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+current_date = datetime.date.today().strftime("%Y-%m-%d")
+datetime_dir = f"frugal_{current_date}"
+args_dir = 'tasks/utils/config.yaml'
+data_args = Container(**yaml.safe_load(open(args_dir, 'r'))['Data'])
+exp_num = data_args.exp_num
+model_name = data_args.model_name
+model_args = Container(**yaml.safe_load(open(args_dir, 'r'))['CNNEncoder'])
+model_args_f = Container(**yaml.safe_load(open(args_dir, 'r'))['CNNEncoder_f'])
+conformer_args = Container(**yaml.safe_load(open(args_dir, 'r'))['Conformer'])
+kan_args = Container(**yaml.safe_load(open(args_dir, 'r'))['KAN'])
+if not os.path.exists(f"{data_args.log_dir}/{datetime_dir}"):
+    os.makedirs(f"{data_args.log_dir}/{datetime_dir}")
+with open("../logs//token.txt", "r") as f:
+    api_key = f.read()
+# local_rank, world_size, gpus_per_node = setup()
+local_rank = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+login(api_key)
+dataset = load_dataset("rfcx/frugalai", streaming=True)
+train_ds = SplitDataset(FFTDataset(dataset["train"]), is_train=True)
+train_dl = DataLoader(train_ds, batch_size=data_args.batch_size, collate_fn=collate_fn)
+val_ds = SplitDataset(FFTDataset(dataset["train"]), is_train=False)
+val_dl = DataLoader(val_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
+test_ds = FFTDataset(dataset["test"])
+test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate_fn)
+# for i, batch in enumerate(train_dl):
+#     x, x_f, y = batch['audio']['array'], batch['audio']['fft'], batch['label']
+#     print(x.shape, x_f.shape, y.shape)
+#     if i > 10:
+#         break
+# exit()
+# model = DualEncoder(model_args, model_args_f, conformer_args)
+# model = FasterKAN([18000,64,64,16,1])
+model = CNNKan(model_args, conformer_args, kan_args.get_dict())
+# model.kan.speed()
+# model = KanEncoder(kan_args.get_dict())
+model = model.to(local_rank)
+# model = DDP(model, device_ids=[local_rank], output_device=local_rank)
+num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+print(f"Number of parameters: {num_params}")
+loss_fn = torch.nn.BCEWithLogitsLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+total_steps = int(data_args.num_epochs) * 1000
+scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
+                                                    T_max=total_steps,
+                                                    eta_min=float((5e-4)/10))
+# missing, unexpected = model.load_state_dict(torch.load(model_args.checkpoint_path))
+# print(f"Missing keys: {missing}")
+# print(f"Unexpected keys: {unexpected}")
+trainer = Trainer(model=model, optimizer=optimizer,
+                        criterion=loss_fn, output_dim=model_args.output_dim, scaler=None,
+                       scheduler=None, train_dataloader=train_dl,
+                       val_dataloader=val_dl, device=local_rank,
+                           exp_num=datetime_dir, log_path=data_args.log_dir,
+                            range_update=None,
+                           accumulation_step=1, max_iter=np.inf,
+                           exp_name=f"frugal_kan_{exp_num}")
+fit_res = trainer.fit(num_epochs=100, device=local_rank,
+                        early_stopping=10, only_p=False, best='loss', conf=True)
+output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
+with open(output_filename, "w") as f:
+    json.dump(fit_res, f, indent=2)
+preds, acc = trainer.predict(test_dl, local_rank)
+print(f"Accuracy: {acc}")

tasks/utils/config.yaml CHANGED Viewed

@@ -1,34 +1,41 @@
 Data:
   # Basics
-  log_dir: '/data/frugal/logs'
   # Data
-  dataset: "KeplerDataset"
-  data_dir: '/data/lightPred/data'
   model_name: "CNNEncoder"
-  batch_size: 16
-  num_epochs: 1000
   exp_num: 2
   max_len_spectra: 4096
   max_days_lc: 270
   lc_freq: 0.0208
   create_umap: True
 CNNEncoder:
   # Model
-  in_channels: 1
   num_layers: 4
   stride: 1
-  encoder_dims: [32,64,128,256]
   kernel_size: 3
   dropout_p: 0.3
   output_dim: 2
   beta: 1
-  load_checkpoint: True
   checkpoint_num: 1
   activation: "silu"
   sine_w0: 1.0
-  avg_output: True
-  checkpoint_path: 'tasks/models/frugal_2025-01-10/frugal_cnnencoder_2.pth'
 CNNEncoder_f:
   # Model
@@ -50,7 +57,7 @@ CNNEncoder_f:
 Conformer:
   encoder: ["mhsa_pro", "conv"]
   timeshift: false
-  num_layers: 8
   encoder_dim: 128
   num_heads: 8
   kernel_size: 3

 Data:
   # Basics
+  log_dir: 'tasks/models'
   # Data
+  dataset: "FFTDataset"
+  data_dir: None
   model_name: "CNNEncoder"
+  batch_size: 32
+  num_epochs: 10
   exp_num: 2
   max_len_spectra: 4096
   max_days_lc: 270
   lc_freq: 0.0208
   create_umap: True
+  checkpoint_path: 'tasks/models/frugal_2025-01-21/frugal_kan_2.pth'
 CNNEncoder:
   # Model
+  in_channels: 2
   num_layers: 4
   stride: 1
+  encoder_dims: [32,64,128]
   kernel_size: 3
   dropout_p: 0.3
   output_dim: 2
   beta: 1
+  load_checkpoint: False
   checkpoint_num: 1
   activation: "silu"
   sine_w0: 1.0
+  avg_output: False
+KAN:
+  layers_hidden: [1125,32,8,8,1]
+  grid_min:  -1.2
+  grid_max:  1.2
+  num_grids:  8
+  exponent:  2
 CNNEncoder_f:
   # Model
 Conformer:
   encoder: ["mhsa_pro", "conv"]
   timeshift: false
+  num_layers: 4
   encoder_dim: 128
   num_heads: 8
   kernel_size: 3

tasks/utils/data.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 from torch.utils.data import IterableDataset
 from torch.fft import fft
 from itertools import tee
 import random
 import torchaudio.transforms as T
@@ -24,20 +25,25 @@ class SplitDataset(IterableDataset):
 class FFTDataset(IterableDataset):
-    def __init__(self, original_dataset, orig_sample_rate=12000, target_sample_rate=6000):
         self.dataset = original_dataset
         self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
     def __iter__(self):
         for item in self.dataset:
             # Assuming your audio data is in item['audio']
             # Modify this based on your actual data structure
             audio_data = torch.tensor(item['audio']['array']).float()
-            if len(audio_data) == 0:
-                continue
-            resampled_audio = self.resampler(audio_data)
-            fft_data = fft(resampled_audio)
             # Update the item with FFT data
             item['audio']['fft'] = fft_data
             yield item

 import torch
 from torch.utils.data import IterableDataset
 from torch.fft import fft
+import torch.nn.functional as F
 from itertools import tee
 import random
 import torchaudio.transforms as T
 class FFTDataset(IterableDataset):
+    def __init__(self, original_dataset, max_len=72000, orig_sample_rate=12000, target_sample_rate=3000):
         self.dataset = original_dataset
         self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
+        self.max_len = max_len
     def __iter__(self):
         for item in self.dataset:
             # Assuming your audio data is in item['audio']
             # Modify this based on your actual data structure
             audio_data = torch.tensor(item['audio']['array']).float()
+            # pad audio
+            # if len(audio_data) == 0:
+            #     continue
+            pad_len = self.max_len - len(audio_data)
+            audio_data = F.pad(audio_data, (0, pad_len), mode='constant')
+            audio_data = self.resampler(audio_data)
+            fft_data = fft(audio_data)
             # Update the item with FFT data
             item['audio']['fft'] = fft_data
+            item['audio']['array'] = audio_data
             yield item

tasks/utils/kan/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .fasterkan import FasterKAN, FasterKANLayer, FasterKANvolver

tasks/utils/kan/fasterkan.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from typing import *
+from torch.autograd import Function
+from .feature_extractor import EnhancedFeatureExtractor
+from .fasterkan_layers import FasterKANLayer
+class FasterKAN(nn.Module):
+    def __init__(
+        self,
+        layers_hidden: List[int],
+        grid_min: float = -1.2,
+        grid_max: float = 1.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+        #use_base_update: bool = True,
+        base_activation = None,
+        spline_weight_init_scale: float = 1.0,
+    ) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList([
+            FasterKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                exponent = exponent,
+                inv_denominator = inv_denominator,
+                train_grid = train_grid ,
+                train_inv_denominator = train_inv_denominator,
+                #use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+        #print(f"FasterKAN layers_hidden[1:] shape: ", len(layers_hidden[1:]))
+        #print(f"FasterKAN layers_hidden[:-1] shape: ", len(layers_hidden[:-1]))
+        #print("FasterKAN zip shape: \n", *[(in_dim, out_dim) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])])
+        #print(f"FasterKAN self.faster_kan_layers shape: \n", len(self.layers))
+        #print(f"FasterKAN self.faster_kan_layers: \n", self.layers)
+    def forward(self, x):
+        for layer in self.layers:
+            #print("FasterKAN layer: \n", layer)
+            #print(f"FasterKAN x shape: {x.shape}")
+            x = layer(x)
+        return x
+class FasterKANvolver(nn.Module):
+    def __init__(
+        self,
+        layers_hidden: List[int],
+        grid_min: float = -1.2,
+        grid_max: float = 0.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+        #use_base_update: bool = True,
+        base_activation = None,
+        spline_weight_init_scale: float = 1.0,
+        view = [-1, 1, 28, 28],
+    ) -> None:
+        super(FasterKANvolver, self).__init__()
+        self.view = view
+        # Feature extractor with Convolutional layers
+        self.feature_extractor = EnhancedFeatureExtractor(colors = view[1])
+        """
+        nn.Sequential(
+            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),  # 1 input channel (grayscale), 16 output channels
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2)
+        )
+        """
+        # Calculate the flattened feature size after convolutional layers
+        flat_features = 256 # XX channels, image size reduced to YxY
+        # Update layers_hidden with the correct input size from conv layers
+        layers_hidden = [flat_features] + layers_hidden
+        #print(f"FasterKANvolver layers_hidden shape: \n", layers_hidden)
+        #print(f"FasterKANvolver layers_hidden[1:] shape: ", len(layers_hidden[1:]))
+        #print(f"FasterKANvolver layers_hidden[:-1] shape: ", len(layers_hidden[:-1]))
+        #print("FasterKANvolver zip shape: \n", *[(in_dim, out_dim) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])])
+        # Define the FasterKAN layers
+        self.faster_kan_layers = nn.ModuleList([
+            FasterKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                exponent=exponent,
+                inv_denominator = 0.5,
+                train_grid = False,
+                train_inv_denominator = False,
+                #use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+        #print(f"FasterKANvolver self.faster_kan_layers shape: \n", len(self.faster_kan_layers))
+        #print(f"FasterKANvolver self.faster_kan_layers: \n", self.faster_kan_layers)
+    def forward(self, x):
+        # Reshape input from [batch_size, 784] to [batch_size, 1, 28, 28] for MNIST [batch_size, 1, 32, 32] for C
+        #print(f"FasterKAN x view shape: {x.shape}")
+        # Handle different input shapes based on the length of view
+        x = x.view(self.view[0], self.view[1], self.view[2], self.view[3])
+        #print(f"FasterKAN x view shape: {x.shape}")
+        # Apply convolutional layers
+        #print(f"FasterKAN x view shape: {x.shape}")
+        x = self.feature_extractor(x)
+        #print(f"FasterKAN x after feature_extractor shape: {x.shape}")
+        x = x.view(x.size(0), -1)  # Flatten the output from the conv layers
+        #rint(f"FasterKAN x shape: {x.shape}")
+        # Pass through FasterKAN layers
+        for layer in self.faster_kan_layers:
+            #print("FasterKAN layer: \n", layer)
+            #print(f"FasterKAN x shape: {x.shape}")
+            x = layer(x)
+            #print(f"FasterKAN x shape: {x.shape}")
+        return x

tasks/utils/kan/fasterkan_basis.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from typing import *
+from torch.autograd import Function
+class RSWAFFunction(Function):
+    @staticmethod
+    def forward(ctx, input, grid, inv_denominator, train_grid, train_inv_denominator):
+        # Compute the forward pass
+        #print('\n')
+        #print(f"Forward pass - grid: {(grid[0].item(),grid[-1].item())}, inv_denominator: {inv_denominator.item()}")
+        #print(f"grid.shape: {grid.shape }")
+        #print(f"grid: {(grid[0],grid[-1]) }")
+        #print(f"inv_denominator.shape: {inv_denominator.shape }")
+        #print(f"inv_denominator: {inv_denominator }")
+        diff = (input[..., None] - grid)
+        diff_mul = diff.mul(inv_denominator)
+        tanh_diff = torch.tanh(diff)
+        tanh_diff_deriviative = -tanh_diff.mul(tanh_diff) + 1  # sech^2(x) = 1 - tanh^2(x)
+        # Save tensors for backward pass
+        ctx.save_for_backward(input, tanh_diff, tanh_diff_deriviative, diff, inv_denominator)
+        ctx.train_grid = train_grid
+        ctx.train_inv_denominator = train_inv_denominator
+        return tanh_diff_deriviative
+##### SOS NOT SURE HOW grad_inv_denominator, grad_grid ARE CALCULATED CORRECTLY YET
+##### MUST CHECK https://github.com/pytorch/pytorch/issues/74802
+##### MUST CHECK https://www.changjiangcai.com/studynotes/2020-10-18-Custom-Function-Extending-PyTorch/
+##### MUST CHECK https://pytorch.org/tutorials/intermediate/custom_function_double_backward_tutorial.html
+##### MUST CHECK https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html
+##### MUST CHECK https://gist.github.com/Hanrui-Wang/bf225dc0ccb91cdce160539c0acc853a
+    @staticmethod
+    def backward(ctx, grad_output):
+        # Retrieve saved tensors
+        input, tanh_diff, tanh_diff_deriviative, diff, inv_denominator = ctx.saved_tensors
+        grad_grid = None
+        grad_inv_denominator = None
+        #print(f"tanh_diff_deriviative shape: {tanh_diff_deriviative.shape }")
+        #print(f"tanh_diff shape: {tanh_diff.shape }")
+        #print(f"grad_output shape: {grad_output.shape }")
+        # Compute the backward pass for the input
+        grad_input = -2 * tanh_diff * tanh_diff_deriviative * grad_output
+        #print(f"Backward pass 1 - grad_input: {(grad_input.min().item(), grad_input.max().item())}")
+        #print(f"grad_input shape: {grad_input.shape }")
+        #print(f"grad_input.sum(dim=-1): {grad_input.sum(dim=-1).shape}")
+        grad_input = grad_input.sum(dim=-1).mul(inv_denominator)
+        #print(f"Backward pass 2 - grad_input: {(grad_input.min().item(), grad_input.max().item())}")
+        #print(f"grad_input: {grad_input}")
+        #print(f"grad_input shape: {grad_input.shape }")
+        # Compute the backward pass for grid
+        if ctx.train_grid:
+            #print('\n')
+            #print(f"grad_grid shape: {grad_grid.shape }")
+            grad_grid = -inv_denominator * grad_output.sum(dim=0).sum(dim=0)#-(inv_denominator * grad_output * tanh_diff_deriviative).sum(dim=0) #-inv_denominator * grad_output.sum(dim=0).sum(dim=0)
+            #print(f"Backward pass - grad_grid: {(grad_grid[0].item(),grad_grid[-1].item())}")
+            #print(f"grad_grid.shape: {grad_grid.shape }")
+            #print(f"grad_grid: {(grad_grid[0],grad_grid[-1]) }")
+            #print(f"inv_denominator shape: {inv_denominator.shape }")
+            #print(f"grad_grid shape: {grad_grid.shape }")
+        # Compute the backward pass for inv_denominator
+        if ctx.train_inv_denominator:
+            grad_inv_denominator = (grad_output* diff).sum() #(grad_output * diff * tanh_diff_deriviative).sum() #(grad_output* diff).sum()
+            #print(f"Backward pass - grad_inv_denominator: {grad_inv_denominator.item()}")
+            #print(f"diff shape: {diff.shape }")
+            #print(f"grad_inv_denominator shape: {grad_inv_denominator.shape }")
+            #print(f"grad_inv_denominator : {grad_inv_denominator }")
+        return grad_input, grad_grid, grad_inv_denominator, None, None # same number as tensors or parameters
+class ReflectionalSwitchFunction(nn.Module):
+    def __init__(
+        self,
+        grid_min: float = -1.2,
+        grid_max: float = 0.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+    ):
+        super().__init__()
+        grid = torch.linspace(grid_min, grid_max, num_grids)
+        self.train_grid = torch.tensor(train_grid, dtype=torch.bool)
+        self.train_inv_denominator = torch.tensor(train_inv_denominator, dtype=torch.bool)
+        self.grid = torch.nn.Parameter(grid, requires_grad=train_grid)
+        #print(f"grid initial shape: {self.grid.shape }")
+        self.inv_denominator = torch.nn.Parameter(torch.tensor(inv_denominator, dtype=torch.float32), requires_grad=train_inv_denominator)  # Cache the inverse of the denominator
+    def forward(self, x):
+        return RSWAFFunction.apply(x, self.grid, self.inv_denominator, self.train_grid, self.train_inv_denominator)
+class SplineLinear(nn.Linear):
+    def __init__(self, in_features: int, out_features: int, init_scale: float = 0.1, **kw) -> None:
+        self.init_scale = init_scale
+        super().__init__(in_features, out_features, bias=False, **kw)
+    def reset_parameters(self) -> None:
+        nn.init.xavier_uniform_(self.weight)  # Using Xavier Uniform initialization

tasks/utils/kan/fasterkan_layers.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from typing import *
+from torch.autograd import Function
+from .fasterkan_basis import ReflectionalSwitchFunction, SplineLinear
+class FasterKANLayer(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        grid_min: float = -1.2,
+        grid_max: float = 0.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+        #use_base_update: bool = True,
+        base_activation = F.silu,
+        spline_weight_init_scale: float = 0.667,
+    ) -> None:
+        super().__init__()
+        self.layernorm = nn.LayerNorm(input_dim)
+        self.rbf = ReflectionalSwitchFunction(grid_min, grid_max, num_grids, exponent, inv_denominator, train_grid, train_inv_denominator)
+        self.spline_linear = SplineLinear(input_dim * num_grids, output_dim, spline_weight_init_scale)
+        #self.use_base_update = use_base_update
+        #if use_base_update:
+        #    self.base_activation = base_activation
+        #    self.base_linear = nn.Linear(input_dim, output_dim)
+    def forward(self, x):
+        #print("Shape before LayerNorm:", x.shape)  # Debugging line to check the input shape
+        x = self.layernorm(x)
+        #print("Shape After LayerNorm:", x.shape)
+        spline_basis = self.rbf(x).view(x.shape[0], -1)
+        #print("spline_basis:", spline_basis.shape)
+        #print("-------------------------")
+        #ret = 0
+        ret = self.spline_linear(spline_basis)
+        #print("spline_basis.shape[:-2]:", spline_basis.shape[:-2])
+        #print("*spline_basis.shape[:-2]:", *spline_basis.shape[:-2])
+        #print("spline_basis.view(*spline_basis.shape[:-2], -1):", spline_basis.view(*spline_basis.shape[:-2], -1).shape)
+        #print("ret:", ret.shape)
+        #print("-------------------------")
+        #if self.use_base_update:
+            #base = self.base_linear(self.base_activation(x))
+            #print("self.base_activation(x):", self.base_activation(x).shape)
+            #print("base:", base.shape)
+            #print("@@@@@@@@@")
+            #ret += base
+        return ret
+        #spline_basis = spline_basis.reshape(x.shape[0], -1)  # Reshape to [batch_size, input_dim * num_grids]
+        #print("spline_basis:", spline_basis.shape)
+        #spline_weight = self.spline_weight.view(-1, self.spline_weight.shape[0])  # Reshape to [input_dim * num_grids, output_dim]
+        #print("spline_weight:", spline_weight.shape)
+        #spline = torch.matmul(spline_basis, spline_weight)  # Resulting shape: [batch_size, output_dim]
+        #print("-------------------------")
+        #print("Base shape:", base.shape)
+        #print("Spline shape:", spline.shape)
+        #print("@@@@@@@@@")
+class FasterKAN(nn.Module):
+    def __init__(
+        self,
+        layers_hidden: List[int],
+        grid_min: float = -1.2,
+        grid_max: float = 0.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+        #use_base_update: bool = True,
+        base_activation = None,
+        spline_weight_init_scale: float = 1.0,
+    ) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList([
+            FasterKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                exponent = exponent,
+                inv_denominator = inv_denominator,
+                train_grid = train_grid ,
+                train_inv_denominator = train_inv_denominator,
+                #use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+        #print(f"FasterKAN layers_hidden[1:] shape: ", len(layers_hidden[1:]))
+        #print(f"FasterKAN layers_hidden[:-1] shape: ", len(layers_hidden[:-1]))
+        #print("FasterKAN zip shape: \n", *[(in_dim, out_dim) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])])
+        #print(f"FasterKAN self.faster_kan_layers shape: \n", len(self.layers))
+        #print(f"FasterKAN self.faster_kan_layers: \n", self.layers)
+    def forward(self, x):
+        for layer in self.layers:
+            #print("FasterKAN layer: \n", layer)
+            #print(f"FasterKAN x shape: {x.shape}")
+            x = layer(x)
+        return x
+class BasicResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(BasicResBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.downsample = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_channels)
+            )
+    def forward(self, x):
+        identity = self.downsample(x)
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += identity
+        out = F.relu(out)
+        return out
+class SEBlock(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SEBlock, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)
+class DepthwiseSeparableConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
+        super(DepthwiseSeparableConv, self).__init__()
+        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size,
+                                   stride=stride, padding=padding, groups=in_channels)
+        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+class SelfAttention(nn.Module):
+    def __init__(self, in_channels):
+        super(SelfAttention, self).__init__()
+        self.query_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
+        self.key_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
+        self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+    def forward(self, x):
+        batch_size, C, width, height = x.size()
+        proj_query = self.query_conv(x).view(batch_size, -1, width * height).permute(0, 2, 1)
+        proj_key = self.key_conv(x).view(batch_size, -1, width * height)
+        energy = torch.bmm(proj_query, proj_key)
+        attention = F.softmax(energy, dim=-1)
+        proj_value = self.value_conv(x).view(batch_size, -1, width * height)
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(batch_size, C, width, height)
+        out = self.gamma * out + x
+        return out
+class EnhancedFeatureExtractor(nn.Module):
+    def __init__(self):
+        super(EnhancedFeatureExtractor, self).__init__()
+        self.initial_layers = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # Increased number of filters
+            nn.ReLU(),
+            nn.BatchNorm2d(32),  # Added Batch Normalization
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            BasicResBlock(32, 64),
+            SEBlock(64, reduction=16),  # Squeeze-and-Excitation block
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            DepthwiseSeparableConv(64, 128, kernel_size=3),  # Increased number of filters
+            nn.ReLU(),
+            BasicResBlock(128, 256),
+            SEBlock(256, reduction=16),
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            SelfAttention(256),  # Added Self-Attention layer
+        )
+        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)  # Global Average Pooling
+    def forward(self, x):
+        x = self.initial_layers(x)
+        x = self.global_avg_pool(x)
+        x = x.view(x.size(0), -1)  # Flatten the output for fully connected layers
+        return x
+class FasterKANvolver(nn.Module):
+    def __init__(
+        self,
+        layers_hidden: List[int],
+        grid_min: float = -1.2,
+        grid_max: float = 0.2,
+        num_grids: int = 8,
+        exponent: int = 2,
+        inv_denominator: float = 0.5,
+        train_grid: bool = False,
+        train_inv_denominator: bool = False,
+        #use_base_update: bool = True,
+        base_activation = None,
+        spline_weight_init_scale: float = 1.0,
+    ) -> None:
+        super(FasterKANvolver, self).__init__()
+        # Feature extractor with Convolutional layers
+        self.feature_extractor = EnhancedFeatureExtractor()
+        """
+        nn.Sequential(
+            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),  # 1 input channel (grayscale), 16 output channels
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2)
+        )
+        """
+        # Calculate the flattened feature size after convolutional layers
+        flat_features = 256 # XX channels, image size reduced to YxY
+        # Update layers_hidden with the correct input size from conv layers
+        layers_hidden = [flat_features] + layers_hidden
+        #print(f"FasterKANvolver layers_hidden shape: \n", layers_hidden)
+        #print(f"FasterKANvolver layers_hidden[1:] shape: ", len(layers_hidden[1:]))
+        #print(f"FasterKANvolver layers_hidden[:-1] shape: ", len(layers_hidden[:-1]))
+        #print("FasterKANvolver zip shape: \n", *[(in_dim, out_dim) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])])
+        # Define the FasterKAN layers
+        self.faster_kan_layers = nn.ModuleList([
+            FasterKANLayer(
+                in_dim, out_dim,
+                grid_min=grid_min,
+                grid_max=grid_max,
+                num_grids=num_grids,
+                exponent=exponent,
+                inv_denominator = 0.5,
+                train_grid = False,
+                train_inv_denominator = False,
+                #use_base_update=use_base_update,
+                base_activation=base_activation,
+                spline_weight_init_scale=spline_weight_init_scale,
+            ) for in_dim, out_dim in zip(layers_hidden[:-1], layers_hidden[1:])
+        ])
+        #print(f"FasterKANvolver self.faster_kan_layers shape: \n", len(self.faster_kan_layers))
+        #print(f"FasterKANvolver self.faster_kan_layers: \n", self.faster_kan_layers)
+    def forward(self, x):
+        # Reshape input from [batch_size, 784] to [batch_size, 1, 28, 28] for MNIST [batch_size, 1, 32, 32] for C
+        #print(f"FasterKAN x view shape: {x.shape}")
+        x = x.view(-1, 3, 32,32)
+        #print(f"FasterKAN x view shape: {x.shape}")
+        # Apply convolutional layers
+        #print(f"FasterKAN x view shape: {x.shape}")
+        x = self.feature_extractor(x)
+        #print(f"FasterKAN x after feature_extractor shape: {x.shape}")
+        x = x.view(x.size(0), -1)  # Flatten the output from the conv layers
+        #rint(f"FasterKAN x shape: {x.shape}")
+        # Pass through FasterKAN layers
+        for layer in self.faster_kan_layers:
+            #print("FasterKAN layer: \n", layer)
+            #print(f"FasterKAN x shape: {x.shape}")
+            x = layer(x)
+            #print(f"FasterKAN x shape: {x.shape}")
+        return x

tasks/utils/kan/feature_extractor.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from typing import *
+from torch.autograd import Function
+class BasicResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(BasicResBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        self.downsample = nn.Sequential()
+        if stride != 1 or in_channels != out_channels:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(out_channels)
+            )
+    def forward(self, x):
+        identity = self.downsample(x)
+        out = F.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += identity
+        out = F.relu(out)
+        return out
+class SEBlock(nn.Module):
+    def __init__(self, channel, reduction=16):
+        super(SEBlock, self).__init__()
+        self.avg_pool = nn.AdaptiveAvgPool2d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        y = self.avg_pool(x).view(b, c)
+        y = self.fc(y).view(b, c, 1, 1)
+        return x * y.expand_as(x)
+class DepthwiseSeparableConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
+        super(DepthwiseSeparableConv, self).__init__()
+        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size,
+                                   stride=stride, padding=padding, groups=in_channels)
+        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        return x
+class SelfAttention(nn.Module):
+    def __init__(self, in_channels):
+        super(SelfAttention, self).__init__()
+        self.query_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
+        self.key_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
+        self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
+        self.gamma = nn.Parameter(torch.zeros(1))
+    def forward(self, x):
+        batch_size, C, width, height = x.size()
+        proj_query = self.query_conv(x).view(batch_size, -1, width * height).permute(0, 2, 1)
+        proj_key = self.key_conv(x).view(batch_size, -1, width * height)
+        energy = torch.bmm(proj_query, proj_key)
+        attention = F.softmax(energy, dim=-1)
+        proj_value = self.value_conv(x).view(batch_size, -1, width * height)
+        out = torch.bmm(proj_value, attention.permute(0, 2, 1))
+        out = out.view(batch_size, C, width, height)
+        out = self.gamma * out + x
+        return out
+class EnhancedFeatureExtractor(nn.Module):
+    def __init__(self,
+        colors = 3):
+        super(EnhancedFeatureExtractor, self).__init__()
+        self.initial_layers = nn.Sequential(
+            nn.Conv2d(colors, 32, kernel_size=3, stride=1, padding=1),  # Increased number of filters
+            nn.ReLU(),
+            nn.BatchNorm2d(32),  # Added Batch Normalization
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            BasicResBlock(32, 64),
+            SEBlock(64, reduction=16),  # Squeeze-and-Excitation block
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            DepthwiseSeparableConv(64, 128, kernel_size=3),  # Increased number of filters
+            nn.ReLU(),
+            BasicResBlock(128, 256),
+            SEBlock(256, reduction=16),
+            nn.MaxPool2d(2, 2),
+            nn.Dropout(0.25),  # Added Dropout
+            SelfAttention(256),  # Added Self-Attention layer
+        )
+        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)  # Global Average Pooling
+    def forward(self, x):
+        x = self.initial_layers(x)
+        x = self.global_avg_pool(x)
+        x = x.view(x.size(0), -1)  # Flatten the output for fully connected layers
+        return x

tasks/utils/models.py CHANGED Viewed

@@ -2,6 +2,8 @@ import torch
 import torch.nn as nn
 from .Modules.conformer import ConformerEncoder, ConformerDecoder
 from .Modules.mhsa_pro import RotaryEmbedding, ContinuousRotaryEmbedding
 class ConvBlock(nn.Module):
   def __init__(self, args, num_layer) -> None:
@@ -111,4 +113,29 @@ class DualEncoder(nn.Module):
         x1 = self.encoder_x(x)
         x2, _ = self.encoder_f(x)
         logits = torch.cat([x1, x2], dim=-1)
-        return self.regressor(logits).squeeze()

 import torch.nn as nn
 from .Modules.conformer import ConformerEncoder, ConformerDecoder
 from .Modules.mhsa_pro import RotaryEmbedding, ContinuousRotaryEmbedding
+from .kan.fasterkan import FasterKAN
+from kan import KAN
 class ConvBlock(nn.Module):
   def __init__(self, args, num_layer) -> None:
         x1 = self.encoder_x(x)
         x2, _ = self.encoder_f(x)
         logits = torch.cat([x1, x2], dim=-1)
+        return self.regressor(logits).squeeze()
+class CNNKan(nn.Module):
+    def __init__(self, args, conformer_args, kan_args):
+        super().__init__()
+        self.backbone = CNNEncoder(args)
+        # self.kan = KAN(width=kan_args['layers_hidden'])
+        self.kan = FasterKAN(**kan_args)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.backbone(x)
+        x = x.mean(dim=1)
+        return self.kan(x)
+class KanEncoder(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.kan_x = FasterKAN(**args)
+        self.kan_f = FasterKAN(**args)
+        self.kan_out = FasterKAN(layers_hidden=[args['layers_hidden'][-1]*2, 8,8,1])
+    def forward(self, x: torch.Tensor, f: torch.Tensor) -> torch.Tensor:
+        x = self.kan_x(x)
+        f = self.kan_f(f)
+        out = torch.cat([x, f], dim=-1)
+        return self.kan_out(out)

tasks/utils/train.py CHANGED Viewed

@@ -74,8 +74,8 @@ class Trainer(object):
         lrs = []
         # self.optim_params['lr_history'] = []
         epochs_without_improvement = 0
-        main_proccess = (torch.distributed.is_initialized() and torch.distributed.get_rank() == 0) or self.device == 'cpu'
         print(f"Starting training for {num_epochs} epochs")
         print("is main process: ", main_proccess, flush=True)
         global_time = time.time()
@@ -221,7 +221,8 @@ class Trainer(object):
         x = x.to(device).float()
         fft = fft.to(device).float()
         y = y.to(device).float()
-        y_pred = self.model(fft)
         loss = self.criterion(y_pred, y)
         loss.backward()
         self.optimizer.step()
@@ -230,7 +231,7 @@ class Trainer(object):
         # get predicted classes
         probs = torch.sigmoid(y_pred)
         cls_pred = (probs > 0.5).float()
-        acc = (cls_pred == y).sum()
         return loss, acc, y
     def eval_epoch(self, device, epoch):
@@ -257,10 +258,11 @@ class Trainer(object):
         x, fft, y = batch['audio']['array'], batch['audio']['fft'], batch['label']
         x = x.to(device).float()
         fft = fft.to(device).float()
         y = y.to(device).float()
         with torch.no_grad():
-            y_pred = self.model(fft)
-        loss = self.criterion(y_pred, y)
         probs = torch.sigmoid(y_pred)
         cls_pred = (probs > 0.5).float()
         acc = (cls_pred == y).sum()
@@ -280,15 +282,16 @@ class Trainer(object):
             x, fft, y = batch['audio']['array'], batch['audio']['fft'], batch['label']
             x = x.to(device).float()
             fft = fft.to(device).float()
             y = y.to(device).float()
             with torch.no_grad():
-                y_pred = self.model(fft)
             loss = self.criterion(y_pred, y)
             probs = torch.sigmoid(y_pred)
             cls_pred = (probs > 0.5).float()
             acc = (cls_pred == y).sum()
-            predictions.append(cls_pred.cpu().numpy())
-            true_labels.append(y.cpu().numpy())
             all_accs += acc
             total += len(y)
             pbar.set_description("acc: {:.4f}".format(acc))

         lrs = []
         # self.optim_params['lr_history'] = []
         epochs_without_improvement = 0
+        # main_proccess = (torch.distributed.is_initialized() and torch.distributed.get_rank() == 0) or self.device == 'cpu'
+        main_proccess = True    # change in a ddp setting
         print(f"Starting training for {num_epochs} epochs")
         print("is main process: ", main_proccess, flush=True)
         global_time = time.time()
         x = x.to(device).float()
         fft = fft.to(device).float()
         y = y.to(device).float()
+        x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
+        y_pred = self.model(x_fft).squeeze()
         loss = self.criterion(y_pred, y)
         loss.backward()
         self.optimizer.step()
         # get predicted classes
         probs = torch.sigmoid(y_pred)
         cls_pred = (probs > 0.5).float()
+        acc = (cls_pred == y).sum()
         return loss, acc, y
     def eval_epoch(self, device, epoch):
         x, fft, y = batch['audio']['array'], batch['audio']['fft'], batch['label']
         x = x.to(device).float()
         fft = fft.to(device).float()
+        x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
         y = y.to(device).float()
         with torch.no_grad():
+            y_pred = self.model(x_fft).squeeze()
+        loss = self.criterion(y_pred.squeeze(), y)
         probs = torch.sigmoid(y_pred)
         cls_pred = (probs > 0.5).float()
         acc = (cls_pred == y).sum()
             x, fft, y = batch['audio']['array'], batch['audio']['fft'], batch['label']
             x = x.to(device).float()
             fft = fft.to(device).float()
+            x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
             y = y.to(device).float()
             with torch.no_grad():
+                y_pred = self.model(x_fft).squeeze()
             loss = self.criterion(y_pred, y)
             probs = torch.sigmoid(y_pred)
             cls_pred = (probs > 0.5).float()
             acc = (cls_pred == y).sum()
+            predictions.extend(cls_pred.cpu().numpy())
+            true_labels.extend(y.cpu().numpy())
             all_accs += acc
             total += len(y)
             pbar.set_description("acc: {:.4f}".format(acc))