Coots commited on
Commit
9040bc1
·
verified ·
1 Parent(s): 32f5b59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -56
app.py CHANGED
@@ -1,56 +1,56 @@
1
- import os
2
- from fastapi import FastAPI, Request
3
- from fastapi.responses import JSONResponse
4
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
-
6
- # Set up safe cache directory for Hugging Face
7
- cache_dir = "/path/to/writable/directory/cache" # Change this to a writable path
8
- os.makedirs(cache_dir, exist_ok=True)
9
- os.environ["TRANSFORMERS_CACHE"] = cache_dir
10
- os.environ["HF_TOKEN"] = cache_dir
11
-
12
- # Optional: Use token only if you're accessing a private model
13
- hf_token = os.getenv("HF_TOKEN")
14
-
15
- # Load tokenizer and model
16
- model_id = "mistralai/Mistral-7B-Instruct-v0.2"
17
-
18
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
19
- model = AutoModelForCausalLM.from_pretrained(
20
- model_id,
21
- token=hf_token,
22
- cache_dir=cache_dir,
23
- device_map="auto", # or "cpu" if no GPU
24
- torch_dtype="auto" # will default to float32 on CPU
25
- )
26
-
27
- # Load pipeline
28
- pipe = pipeline(
29
- "text-generation",
30
- model=model,
31
- tokenizer=tokenizer,
32
- max_new_tokens=256,
33
- temperature=0.7,
34
- top_p=0.9,
35
- repetition_penalty=1.1,
36
- )
37
-
38
- # Init FastAPI app
39
- app = FastAPI()
40
-
41
- @app.post("/api")
42
- async def ask_ai(request: Request):
43
- try:
44
- data = await request.json()
45
- question = data.get("question", "").strip()
46
-
47
- if not question:
48
- return JSONResponse(content={"answer": "❗ Please enter a valid question."})
49
-
50
- prompt = f"[INST] {question.strip()} [/INST]"
51
- output = pipe(prompt)[0]["generated_text"]
52
- return JSONResponse(content={"answer": output.strip()})
53
-
54
- except Exception as e:
55
- return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})
56
-
 
1
+ import os
2
+ from fastapi import FastAPI, Request
3
+ from fastapi.responses import JSONResponse
4
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
5
+
6
+ # Set up safe cache directory for Hugging Face
7
+ cache_dir = os.getenv("TRANSFORMERS_CACHE", "/cache") # Use environment variable or default to /cache
8
+ os.makedirs(cache_dir, exist_ok=True)
9
+
10
+ # Optional: Use token only if you're accessing a private model
11
+ hf_token = os.getenv("HF_TOKEN")
12
+
13
+ # Load tokenizer and model
14
+ model_id = "mistralai/Mistral-7B-Instruct-v0.2"
15
+
16
+ try:
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
18
+ model = AutoModelForCausalLM.from_pretrained(
19
+ model_id,
20
+ token=hf_token,
21
+ cache_dir=cache_dir,
22
+ device_map="auto", # or "cpu" if no GPU
23
+ torch_dtype="auto" # will default to float32 on CPU
24
+ )
25
+ except Exception as e:
26
+ raise RuntimeError(f"Failed to load model or tokenizer: {str(e)}")
27
+
28
+ # Load pipeline
29
+ pipe = pipeline(
30
+ "text-generation",
31
+ model=model,
32
+ tokenizer=tokenizer,
33
+ max_new_tokens=256,
34
+ temperature=0.7,
35
+ top_p=0.9,
36
+ repetition_penalty=1.1,
37
+ )
38
+
39
+ # Init FastAPI app
40
+ app = FastAPI()
41
+
42
+ @app.post("/api")
43
+ async def ask_ai(request: Request):
44
+ try:
45
+ data = await request.json()
46
+ question = data.get("question", "").strip()
47
+
48
+ if not question:
49
+ return JSONResponse(content={"answer": "❗ Please enter a valid question."})
50
+
51
+ prompt = f"[INST] {question} [/INST]"
52
+ output = pipe(prompt)[0]["generated_text"]
53
+ return JSONResponse(content={"answer": output.strip()})
54
+
55
+ except Exception as e:
56
+ return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})