Spaces:

Coots
/

ArqonzChat

Runtime error

App Files Files Community

Coots commited on Jul 24

Commit

9040bc1

verified ·

1 Parent(s): 32f5b59

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -56

app.py CHANGED Viewed

@@ -1,56 +1,56 @@
-   import os
-   from fastapi import FastAPI, Request
-   from fastapi.responses import JSONResponse
-   from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
-   # Set up safe cache directory for Hugging Face
-   cache_dir = "/path/to/writable/directory/cache"  # Change this to a writable path
-   os.makedirs(cache_dir, exist_ok=True)
-   os.environ["TRANSFORMERS_CACHE"] = cache_dir
-   os.environ["HF_TOKEN"] = cache_dir
-   # Optional: Use token only if you're accessing a private model
-   hf_token = os.getenv("HF_TOKEN")
-   # Load tokenizer and model
-   model_id = "mistralai/Mistral-7B-Instruct-v0.2"
-   tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
-   model = AutoModelForCausalLM.from_pretrained(
-       model_id,
-       token=hf_token,
-       cache_dir=cache_dir,
-       device_map="auto",       # or "cpu" if no GPU
-       torch_dtype="auto"       # will default to float32 on CPU
-   )
-   # Load pipeline
-   pipe = pipeline(
-       "text-generation",
-       model=model,
-       tokenizer=tokenizer,
-       max_new_tokens=256,
-       temperature=0.7,
-       top_p=0.9,
-       repetition_penalty=1.1,
-   )
-   # Init FastAPI app
-   app = FastAPI()
-   @app.post("/api")
-   async def ask_ai(request: Request):
-       try:
-           data = await request.json()
-           question = data.get("question", "").strip()
-           if not question:
-               return JSONResponse(content={"answer": "❗ Please enter a valid question."})
-           prompt = f"[INST] {question.strip()} [/INST]"
-           output = pipe(prompt)[0]["generated_text"]
-           return JSONResponse(content={"answer": output.strip()})
-       except Exception as e:
-           return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})

+import os
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+# Set up safe cache directory for Hugging Face
+cache_dir = os.getenv("TRANSFORMERS_CACHE", "/cache")  # Use environment variable or default to /cache
+os.makedirs(cache_dir, exist_ok=True)
+# Optional: Use token only if you're accessing a private model
+hf_token = os.getenv("HF_TOKEN")
+# Load tokenizer and model
+model_id = "mistralai/Mistral-7B-Instruct-v0.2"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, cache_dir=cache_dir)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        token=hf_token,
+        cache_dir=cache_dir,
+        device_map="auto",       # or "cpu" if no GPU
+        torch_dtype="auto"       # will default to float32 on CPU
+    )
+except Exception as e:
+    raise RuntimeError(f"Failed to load model or tokenizer: {str(e)}")
+# Load pipeline
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=256,
+    temperature=0.7,
+    top_p=0.9,
+    repetition_penalty=1.1,
+)
+# Init FastAPI app
+app = FastAPI()
+@app.post("/api")
+async def ask_ai(request: Request):
+    try:
+        data = await request.json()
+        question = data.get("question", "").strip()
+        if not question:
+            return JSONResponse(content={"answer": "❗ Please enter a valid question."})
+        prompt = f"[INST] {question} [/INST]"
+        output = pipe(prompt)[0]["generated_text"]
+        return JSONResponse(content={"answer": output.strip()})
+    except Exception as e:
+        return JSONResponse(content={"answer": f"⚠️ Error: {str(e)}"})