Spaces:

Sirawitch
/

kkulchatbot

Runtime error

App Files Files Community

Sirawitch commited on Sep 19, 2024

Commit

b8408d1

verified ·

1 Parent(s): 2ef8549

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -4

app.py CHANGED Viewed

@@ -9,7 +9,26 @@ app = FastAPI()
 # โหลดโมเดลและ tokenizer
 model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
 class Query(BaseModel):
     queryResult: Optional[dict] = None
@@ -25,9 +44,10 @@ async def webhook(query: Query):
         # สร้าง prompt และ generate ข้อความ
         prompt = f"Human: {user_query}\nAI:"
-        input_ids = tokenizer.encode(prompt, return_tensors="pt").to(model.device)
-        output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
         response = tokenizer.decode(output[0], skip_special_tokens=True)
         # แยกส่วนที่เป็นคำตอบของ AI
@@ -35,4 +55,8 @@ async def webhook(query: Query):
         return {"fulfillmentText": ai_response}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

 # โหลดโมเดลและ tokenizer
 model_name = "scb10x/llama-3-typhoon-v1.5-8b-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# ตรวจสอบว่ามี GPU หรือไม่
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# โหลดโมเดลด้วยการตั้งค่าที่เหมาะสม
+if device == "cuda":
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        low_cpu_mem_usage=True
+    )
+else:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    )
+model.to(device)
 class Query(BaseModel):
     queryResult: Optional[dict] = None
         # สร้าง prompt และ generate ข้อความ
         prompt = f"Human: {user_query}\nAI:"
+        input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
+        with torch.no_grad():
+            output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
         response = tokenizer.decode(output[0], skip_special_tokens=True)
         # แยกส่วนที่เป็นคำตอบของ AI
         return {"fulfillmentText": ai_response}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)