Coots commited on
Commit
f0601cc
Β·
verified Β·
1 Parent(s): e7328db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -1,22 +1,24 @@
1
  from fastapi import FastAPI, Request
2
  from fastapi.responses import JSONResponse, FileResponse
3
  from transformers import pipeline
4
- from huggingface_hub import login
5
  import os
6
 
7
- # πŸ› οΈ Fix write permission issue
8
- os.environ["HF_HOME"] = "/data" # βœ… Hugging Face model cache path
9
 
10
- # πŸ” Login with token from Hugging Face Secrets (don't hardcode!)
11
  hf_token = os.getenv("HF_TOKEN")
12
- if hf_token:
13
- login(token=hf_token)
14
-
15
- # βœ… Load the model pipeline (CPU-friendly)
16
  model_id = os.getenv("MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.2")
17
- pipe = pipeline("text-generation", model=model_id, trust_remote_code=True)
18
 
19
- # πŸš€ FastAPI app
 
 
 
 
 
 
 
 
20
  app = FastAPI()
21
 
22
  @app.get("/")
@@ -28,13 +30,13 @@ async def serve_script():
28
  return FileResponse("script.js")
29
 
30
  @app.post("/api")
31
- async def chat_endpoint(request: Request):
32
  data = await request.json()
33
  question = data.get("question", "")
34
  prompt = f"[INST] {question.strip()} [/INST]"
35
  try:
36
- result = pipe(prompt, max_new_tokens=256, temperature=0.7)
37
- response = result[0]["generated_text"].split("[/INST]")[-1].strip()
38
  return JSONResponse({"answer": response})
39
  except Exception as e:
40
  return JSONResponse({"answer": f"Error: {str(e)}"})
 
1
  from fastapi import FastAPI, Request
2
  from fastapi.responses import JSONResponse, FileResponse
3
  from transformers import pipeline
 
4
  import os
5
 
6
+ # βœ… Use /data for HF cache β€” only writable directory in Spaces
7
+ os.environ["HF_HOME"] = "/data"
8
 
9
+ # βœ… Get token and model from env
10
  hf_token = os.getenv("HF_TOKEN")
 
 
 
 
11
  model_id = os.getenv("MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.2")
 
12
 
13
+ # βœ… Load pipeline without login(), using token directly
14
+ pipe = pipeline(
15
+ "text-generation",
16
+ model=model_id,
17
+ token=hf_token,
18
+ trust_remote_code=True
19
+ )
20
+
21
+ # βœ… Set up FastAPI app
22
  app = FastAPI()
23
 
24
  @app.get("/")
 
30
  return FileResponse("script.js")
31
 
32
  @app.post("/api")
33
+ async def ask_ai(request: Request):
34
  data = await request.json()
35
  question = data.get("question", "")
36
  prompt = f"[INST] {question.strip()} [/INST]"
37
  try:
38
+ output = pipe(prompt, max_new_tokens=256, temperature=0.7)
39
+ response = output[0]["generated_text"].split("[/INST]")[-1].strip()
40
  return JSONResponse({"answer": response})
41
  except Exception as e:
42
  return JSONResponse({"answer": f"Error: {str(e)}"})