Coots commited on
Commit
1b75a9a
·
verified ·
1 Parent(s): c9d68fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -40
app.py CHANGED
@@ -1,56 +1,51 @@
1
- import os
2
  from fastapi import FastAPI, Request
3
- from fastapi.responses import JSONResponse
 
 
 
4
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
 
6
- # Set up safe cache directory for Hugging Face
7
- cache_dir = os.getenv("TRANSFORMERS_CACHE", "/cache") # Use environment variable or default to /cache
8
  os.makedirs(cache_dir, exist_ok=True)
9
 
10
- # Optional: Use token only if you're accessing a private model
11
  hf_token = os.getenv("HF_TOKEN")
12
 
13
- # Load tokenizer and model
14
  model_id = "mistralai/Mistral-7B-Instruct-v0.2"
15
-
16
- try:
17
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
18
- model = AutoModelForCausalLM.from_pretrained(
19
- model_id,
20
- token=hf_token,
21
- cache_dir=cache_dir,
22
- device_map="auto", # or "cpu" if no GPU
23
- torch_dtype="auto" # will default to float32 on CPU
24
- )
25
- except Exception as e:
26
- raise RuntimeError(f"Failed to load model or tokenizer: {str(e)}")
27
-
28
- # Load pipeline
29
- pipe = pipeline(
30
- "text-generation",
31
- model=model,
32
- tokenizer=tokenizer,
33
- max_new_tokens=256,
34
- temperature=0.7,
35
- top_p=0.9,
36
- repetition_penalty=1.1,
37
  )
38
 
39
- # Init FastAPI app
 
 
40
  app = FastAPI()
41
 
 
 
 
 
 
 
 
 
 
 
42
  @app.post("/api")
43
  async def ask_ai(request: Request):
44
- try:
45
- data = await request.json()
46
- question = data.get("question", "").strip()
47
-
48
- if not question:
49
- return JSONResponse(content={"answer": "❗ Please enter a valid question."})
50
 
51
- prompt = f"[INST] {question} [/INST]"
52
- output = pipe(prompt)[0]["generated_text"]
53
- return JSONResponse(content={"answer": output.strip()})
54
 
55
- except Exception as e:
56
- return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})
 
 
 
1
  from fastapi import FastAPI, Request
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from pathlib import Path
5
+ import os
6
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
7
 
8
+ # Set Hugging Face cache dir
9
+ cache_dir = os.getenv("TRANSFORMERS_CACHE", "/cache")
10
  os.makedirs(cache_dir, exist_ok=True)
11
 
12
+ # Token for private models
13
  hf_token = os.getenv("HF_TOKEN")
14
 
15
+ # Load model
16
  model_id = "mistralai/Mistral-7B-Instruct-v0.2"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_id,
20
+ token=hf_token,
21
+ cache_dir=cache_dir,
22
+ device_map="auto",
23
+ torch_dtype="auto"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  )
25
 
26
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
27
+
28
+ # Create FastAPI app
29
  app = FastAPI()
30
 
31
+ # Serve static files (JS, CSS, etc.)
32
+ app.mount("/static", StaticFiles(directory="."), name="static")
33
+
34
+ # Route: Serve index.html at root
35
+ @app.get("/", response_class=HTMLResponse)
36
+ async def root():
37
+ html_path = Path("index.html")
38
+ return HTMLResponse(content=html_path.read_text(), status_code=200)
39
+
40
+ # Route: Chat API
41
  @app.post("/api")
42
  async def ask_ai(request: Request):
43
+ data = await request.json()
44
+ question = data.get("question", "").strip()
 
 
 
 
45
 
46
+ if not question:
47
+ return JSONResponse(content={"answer": "❗ Please enter a valid question."})
 
48
 
49
+ prompt = f"[INST] {question} [/INST]"
50
+ output = pipe(prompt)[0]["generated_text"]
51
+ return JSONResponse(content={"answer": output.strip()})