CRM

Running on Zero

App Files Files Community

YoussefAnso commited on 10 days ago

Commit

1d3fed2

1 Parent(s): 3e7ee7c

Refactor attention module to improve xformers integration. Renamed availability flag to HAS_XFORMERS and added safe_memory_efficient_attention function for better handling of attention operations across devices. Updated related assertions and calls to ensure compatibility with systems lacking GPU support.

Browse files

Files changed (2) hide show

imagedream/ldm/modules/attention.py +20 -7
imagedream/ldm/modules/diffusionmodules/model.py +19 -6

imagedream/ldm/modules/attention.py CHANGED Viewed

@@ -12,10 +12,9 @@ from .diffusionmodules.util import checkpoint
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = True
-except:
-    XFORMERS_IS_AVAILBLE = False
 # CrossAttn precision handling
 import os
@@ -138,6 +137,20 @@ class SpatialSelfAttention(nn.Module):
         return x + h_
 class MemoryEfficientCrossAttention(nn.Module):
     # https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
     def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, **kwargs):
@@ -195,7 +208,7 @@ class MemoryEfficientCrossAttention(nn.Module):
         )
         # actually compute the attention, what we cannot get enough of
-        out = xformers.ops.memory_efficient_attention(
             q, k, v, attn_bias=None, op=self.attention_op
         )
@@ -209,7 +222,7 @@ class MemoryEfficientCrossAttention(nn.Module):
                 (k_ip, v_ip),
             )
             # actually compute the attention, what we cannot get enough of
-            out_ip = xformers.ops.memory_efficient_attention(
                 q, k_ip, v_ip, attn_bias=None, op=self.attention_op
             )
             out = out + self.ip_weight * out_ip
@@ -239,7 +252,7 @@ class BasicTransformerBlock(nn.Module):
         **kwargs
     ):
         super().__init__()
-        assert XFORMERS_IS_AVAILBLE, "xformers is not available"
         attn_cls = MemoryEfficientCrossAttention
         self.disable_self_attn = disable_self_attn
         self.attn1 = attn_cls(

 try:
     import xformers
     import xformers.ops
+    HAS_XFORMERS = True
+except ImportError:
+    HAS_XFORMERS = False
 # CrossAttn precision handling
 import os
         return x + h_
+def safe_memory_efficient_attention(q, k, v, attn_bias=None, op=None, p=0.0):
+    if q.device.type == "cuda" and HAS_XFORMERS:
+        return xformers.ops.memory_efficient_attention(q, k, v, attn_bias=attn_bias, op=op, p=p)
+    else:
+        # Standard attention for CPU
+        scale = 1.0 / (q.shape[-1] ** 0.5)
+        attn = torch.matmul(q * scale, k.transpose(-2, -1))
+        if attn_bias is not None:
+            attn = attn + attn_bias
+        attn = torch.softmax(attn, dim=-1)
+        attn = torch.nn.functional.dropout(attn, p=p)
+        return torch.matmul(attn, v)
 class MemoryEfficientCrossAttention(nn.Module):
     # https://github.com/MatthieuTPHR/diffusers/blob/d80b531ff8060ec1ea982b65a1b8df70f73aa67c/src/diffusers/models/attention.py#L223
     def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0, **kwargs):
         )
         # actually compute the attention, what we cannot get enough of
+        out = safe_memory_efficient_attention(
             q, k, v, attn_bias=None, op=self.attention_op
         )
                 (k_ip, v_ip),
             )
             # actually compute the attention, what we cannot get enough of
+            out_ip = safe_memory_efficient_attention(
                 q, k_ip, v_ip, attn_bias=None, op=self.attention_op
             )
             out = out + self.ip_weight * out_ip
         **kwargs
     ):
         super().__init__()
+        assert HAS_XFORMERS, "xformers is not available"
         attn_cls = MemoryEfficientCrossAttention
         self.disable_self_attn = disable_self_attn
         self.attn1 = attn_cls(

imagedream/ldm/modules/diffusionmodules/model.py CHANGED Viewed

@@ -11,10 +11,9 @@ from ..attention import MemoryEfficientCrossAttention
 try:
     import xformers
     import xformers.ops
-    XFORMERS_IS_AVAILBLE = True
-except:
-    XFORMERS_IS_AVAILBLE = False
     print("No module 'xformers'. Proceeding without it.")
@@ -238,7 +237,7 @@ class MemoryEfficientAttnBlock(nn.Module):
             .contiguous(),
             (q, k, v),
         )
-        out = xformers.ops.memory_efficient_attention(
             q, k, v, attn_bias=None, op=self.attention_op
         )
@@ -262,6 +261,20 @@ class MemoryEfficientCrossAttentionWrapper(MemoryEfficientCrossAttention):
         return x + out
 def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
     assert attn_type in [
         "vanilla",
@@ -270,7 +283,7 @@ def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
         "linear",
         "none",
     ], f"attn_type {attn_type} unknown"
-    if XFORMERS_IS_AVAILBLE and attn_type == "vanilla":
         attn_type = "vanilla-xformers"
     print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
     if attn_type == "vanilla":

 try:
     import xformers
     import xformers.ops
+    HAS_XFORMERS = True
+except ImportError:
+    HAS_XFORMERS = False
     print("No module 'xformers'. Proceeding without it.")
             .contiguous(),
             (q, k, v),
         )
+        out = safe_memory_efficient_attention(
             q, k, v, attn_bias=None, op=self.attention_op
         )
         return x + out
+def safe_memory_efficient_attention(q, k, v, attn_bias=None, op=None, p=0.0):
+    if q.device.type == "cuda" and HAS_XFORMERS:
+        return xformers.ops.memory_efficient_attention(q, k, v, attn_bias=attn_bias, op=op, p=p)
+    else:
+        # Standard attention for CPU
+        scale = 1.0 / (q.shape[-1] ** 0.5)
+        attn = torch.matmul(q * scale, k.transpose(-2, -1))
+        if attn_bias is not None:
+            attn = attn + attn_bias
+        attn = torch.softmax(attn, dim=-1)
+        attn = torch.nn.functional.dropout(attn, p=p)
+        return torch.matmul(attn, v)
 def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None):
     assert attn_type in [
         "vanilla",
         "linear",
         "none",
     ], f"attn_type {attn_type} unknown"
+    if HAS_XFORMERS and attn_type == "vanilla":
         attn_type = "vanilla-xformers"
     print(f"making attention of type '{attn_type}' with {in_channels} in_channels")
     if attn_type == "vanilla":