Spaces:

AlphaPhoenix
/

MATRIX

Sleeping

App Files Files Community

laserbeam2045 commited on May 4

Commit

9d3ba14

1 Parent(s): 215bcb0

fix

Browse files

Files changed (2) hide show

app.py +40 -15
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
-from pyllamacpp.model import Model
 # -----------------------------------------------------------------------------
 # Hugging Face Hub の設定
@@ -32,11 +32,24 @@ if not os.path.exists(MODEL_PATH):
     )
 # -----------------------------------------------------------------------------
-# llama.cpp (pyllamacpp) で 4bit GGUF モデルをロード
 # -----------------------------------------------------------------------------
-llm = Model(
-    model_path=MODEL_PATH,
-)
 # -----------------------------------------------------------------------------
 # FastAPI 定義
@@ -48,21 +61,32 @@ class GenerationRequest(BaseModel):
     max_new_tokens: int = 128
     temperature: float = 0.8
     top_p: float = 0.95
 @app.post("/generate")
 async def generate(req: GenerationRequest):
     if not req.prompt:
         raise HTTPException(status_code=400, detail="`prompt` は必須です。")
-    # llama.cpp の generate を呼び出し
-    text = llm.generate(
-        req.prompt,
-        top_p=req.top_p,
-        temp=req.temperature,
-        n_predict=req.max_new_tokens,
-        repeat_last_n=64,
-        repeat_penalty=1.1
-    )
-    return {"generated_text": text}
 # -----------------------------------------------------------------------------
 # ローカル起動用
@@ -70,4 +94,5 @@ async def generate(req: GenerationRequest):
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get("PORT", 8000))
     uvicorn.run("app:app", host="0.0.0.0", port=port, log_level="info")

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama # llama-cpp-python をインポート
 # -----------------------------------------------------------------------------
 # Hugging Face Hub の設定
     )
 # -----------------------------------------------------------------------------
+# llama-cpp-python で 4bit GGUF モデルをロード
 # -----------------------------------------------------------------------------
+print(f"Loading model from {MODEL_PATH}...")
+try:
+    llm = Llama(
+        model_path=MODEL_PATH,
+        n_ctx=2048,      # コンテキストサイズ (モデルに合わせて調整してください)
+        # n_gpu_layers=-1, # GPU を使う場合 (Hugging Face Spaces 無料枠では通常 0)
+        n_gpu_layers=0,   # CPU のみ使用
+        verbose=True     # 詳細ログを出力
+    )
+    print("Model loaded successfully.")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # エラーが発生した場合、アプリケーションを終了させるか、エラーハンドリングを行う
+    # ここでは簡単なエラーメッセージを出力して終了する例
+    raise RuntimeError(f"Failed to load the LLM model: {e}")
 # -----------------------------------------------------------------------------
 # FastAPI 定義
     max_new_tokens: int = 128
     temperature: float = 0.8
     top_p: float = 0.95
+    # llama-cpp-python で利用可能な他のパラメータも追加可能
+    # stop: list[str] | None = None
+    # repeat_penalty: float = 1.1
 @app.post("/generate")
 async def generate(req: GenerationRequest):
     if not req.prompt:
         raise HTTPException(status_code=400, detail="`prompt` は必須です。")
+    try:
+        # llama-cpp-python の __call__ メソッドで生成
+        output = llm(
+            req.prompt,
+            max_tokens=req.max_new_tokens,
+            temperature=req.temperature,
+            top_p=req.top_p,
+            # stop=req.stop, # 必要なら追加
+            # repeat_penalty=req.repeat_penalty, # 必要なら追加
+        )
+        # 生成されたテキストを取得
+        generated_text = output["choices"][0]["text"]
+        return {"generated_text": generated_text}
+    except Exception as e:
+        print(f"Error during generation: {e}")
+        raise HTTPException(status_code=500, detail=f"生成中にエラーが発生しました: {e}")
 # -----------------------------------------------------------------------------
 # ローカル起動用
 if __name__ == "__main__":
     import uvicorn
     port = int(os.environ.get("PORT", 8000))
+    # アプリケーションのロードに失敗した場合に備えて try-except を追加することも検討
     uvicorn.run("app:app", host="0.0.0.0", port=port, log_level="info")

requirements.txt CHANGED Viewed

@@ -1,3 +1,3 @@
 fastapi
 uvicorn[standard]
-pyllamacpp

 fastapi
 uvicorn[standard]
+llama-cpp-python