Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
|
| 3 |
import os
|
| 4 |
import sys
|
| 5 |
import subprocess
|
|
@@ -9,7 +7,7 @@ from typing import List, Dict, Any, Optional
|
|
| 9 |
# 警告: 這在許多託管環境中可能因權限不足而失敗。建議使用 requirements.txt。
|
| 10 |
|
| 11 |
def install_required_modules():
|
| 12 |
-
"""使用 pip 在運行時安裝所有必要的 Python
|
| 13 |
required_packages = [
|
| 14 |
"fastapi",
|
| 15 |
"uvicorn",
|
|
@@ -18,7 +16,21 @@ def install_required_modules():
|
|
| 18 |
"llama-cpp-python"
|
| 19 |
]
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
try:
|
| 24 |
subprocess.check_call([
|
|
@@ -27,11 +39,17 @@ def install_required_modules():
|
|
| 27 |
"pip",
|
| 28 |
"install",
|
| 29 |
*required_packages,
|
| 30 |
-
"--upgrade"
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
except subprocess.CalledProcessError as e:
|
| 34 |
print(f"**致命錯誤**:模組安裝失敗。錯誤訊息: {e}")
|
|
|
|
| 35 |
sys.exit(1)
|
| 36 |
except Exception as e:
|
| 37 |
print(f"**致命錯誤**:發生未知錯誤。錯誤訊息: {e}")
|
|
@@ -54,7 +72,7 @@ try:
|
|
| 54 |
from huggingface_hub import hf_hub_download
|
| 55 |
|
| 56 |
# 引入 Llama.cpp 模組
|
| 57 |
-
from llama_cpp import Llama
|
| 58 |
except ImportError as e:
|
| 59 |
print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
|
| 60 |
sys.exit(1)
|
|
@@ -73,6 +91,12 @@ def initialize_llm():
|
|
| 73 |
if LLAMA_INSTANCE is not None:
|
| 74 |
return
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
|
| 77 |
try:
|
| 78 |
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
import subprocess
|
|
|
|
| 7 |
# 警告: 這在許多託管環境中可能因權限不足而失敗。建議使用 requirements.txt。
|
| 8 |
|
| 9 |
def install_required_modules():
|
| 10 |
+
"""使用 pip 在運行時安裝所有必要的 Python 模組,並強制啟用 AVX-512 編譯。"""
|
| 11 |
required_packages = [
|
| 12 |
"fastapi",
|
| 13 |
"uvicorn",
|
|
|
|
| 16 |
"llama-cpp-python"
|
| 17 |
]
|
| 18 |
|
| 19 |
+
# ----------------------------------------------------
|
| 20 |
+
# **核心修改處:設定 Llama.cpp 編譯選項**
|
| 21 |
+
# ----------------------------------------------------
|
| 22 |
+
compile_env = os.environ.copy()
|
| 23 |
+
|
| 24 |
+
# 1. 強制使用 CMake
|
| 25 |
+
compile_env["FORCE_CMAKE"] = "1"
|
| 26 |
+
|
| 27 |
+
# 2. 設定 CMake 參數,啟用 AVX512 和 AVX512_VNNI
|
| 28 |
+
# 注意: 如果您的 CPU 不支援 AVX512,這將導致程式運行時錯誤 (Illegal instruction)。
|
| 29 |
+
# 推薦將其設為環境變數,例如 os.environ.get("LLAMA_COMPILER_FLAGS", "-DLLAMA_AVX512=ON -DLLAMA_AVX512_VNNI=ON")
|
| 30 |
+
compile_env["CMAKE_ARGS"] = "-DLLAMA_AVX512=ON -DLLAMA_AVX512_VNNI=ON"
|
| 31 |
+
# ----------------------------------------------------
|
| 32 |
+
|
| 33 |
+
print("--- 嘗試動態安裝/升級必要的 Python 模組 (啟用 AVX-512 編譯) ---")
|
| 34 |
|
| 35 |
try:
|
| 36 |
subprocess.check_call([
|
|
|
|
| 39 |
"pip",
|
| 40 |
"install",
|
| 41 |
*required_packages,
|
| 42 |
+
"--upgrade",
|
| 43 |
+
"--no-cache-dir", # 確保重新編譯
|
| 44 |
+
"--force-reinstall" # 確保重新編譯
|
| 45 |
+
],
|
| 46 |
+
# 將設定好的環境變數傳遞給 subprocess
|
| 47 |
+
env=compile_env)
|
| 48 |
+
|
| 49 |
+
print("所有模組安裝/更新成功,llama-cpp-python 已使用 AVX-512 編譯。")
|
| 50 |
except subprocess.CalledProcessError as e:
|
| 51 |
print(f"**致命錯誤**:模組安裝失敗。錯誤訊息: {e}")
|
| 52 |
+
print("請檢查您的 CPU 是否支援 AVX-512,或嘗試移除 CMAKE_ARGS 環境變數。")
|
| 53 |
sys.exit(1)
|
| 54 |
except Exception as e:
|
| 55 |
print(f"**致命錯誤**:發生未知錯誤。錯誤訊息: {e}")
|
|
|
|
| 72 |
from huggingface_hub import hf_hub_download
|
| 73 |
|
| 74 |
# 引入 Llama.cpp 模組
|
| 75 |
+
from llama_cpp import Llama, llama_print_system_info # 增加 system info 檢查
|
| 76 |
except ImportError as e:
|
| 77 |
print(f"**致命錯誤**:模組引入失敗。錯誤: {e}")
|
| 78 |
sys.exit(1)
|
|
|
|
| 91 |
if LLAMA_INSTANCE is not None:
|
| 92 |
return
|
| 93 |
|
| 94 |
+
# 檢查 AVX-512 是否啟用
|
| 95 |
+
print("--- Llama.cpp System Info ---")
|
| 96 |
+
print(llama_print_system_info())
|
| 97 |
+
print("-----------------------------")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
print(f"--- 1. 開始下載模型 {MODEL_NAME} ---")
|
| 101 |
try:
|
| 102 |
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_NAME)
|