OpenSUNO

Paused

ginipick commited on Jan 29

Commit

cb17632

verified ·

1 Parent(s): 5d47f79

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -154,18 +154,21 @@ def install_flash_attn():
         logging.info(f"Detected CUDA version: {cuda_version}")
-        # CUDA 11.8 specific wheel for Python 3.10
-        if cuda_version.startswith("11.8"):
             flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu11torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
-            subprocess.run(
-                ["pip", "install", flash_attn_url],
-                check=True,
-                capture_output=True
-            )
         else:
             logging.warning(f"Unsupported CUDA version: {cuda_version}, skipping flash-attn installation")
             return False
         logging.info("flash-attn installed successfully!")
         return True
     except Exception as e:
@@ -437,11 +440,12 @@ Stay with me forever, let our love just flow
     )
 # 서버 설정으로 실행
-demo.queue(concurrency_count=2).launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=True,
-    enable_queue=True,
     show_api=True,
-    show_error=True
-)

         logging.info(f"Detected CUDA version: {cuda_version}")
+        # CUDA 버전별 wheel 파일 선택
+        if cuda_version.startswith("12.1"):
+            flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.5/flash_attn-2.7.5+cu121torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
+        elif cuda_version.startswith("11.8"):
             flash_attn_url = "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu11torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
         else:
             logging.warning(f"Unsupported CUDA version: {cuda_version}, skipping flash-attn installation")
             return False
+        subprocess.run(
+            ["pip", "install", flash_attn_url],
+            check=True,
+            capture_output=True
+        )
         logging.info("flash-attn installed successfully!")
         return True
     except Exception as e:
     )
 # 서버 설정으로 실행
+# 서버 설정으로 실행
+demo.queue(max_size=20).launch(
     server_name="0.0.0.0",
     server_port=7860,
     share=True,
     show_api=True,
+    show_error=True,
+    concurrency_count=2  # queue()가 아닌 launch()의 파라미터로 이동
+)