hostserver2

Sleeping

abdullahalioo commited on May 21

Commit

6f6ae2a

verified ·

1 Parent(s): 463f46a

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -7,14 +7,24 @@ import torch
 import os
 import asyncio
-# ✅ Use writable temp dir for Hugging Face cache
-os.environ["HF_HOME"] = "/tmp/hf_home"
-os.makedirs(os.environ["HF_HOME"], exist_ok=True)
 # ✅ Load model and tokenizer
 model_name = "Qwen/Qwen2.5-0.5B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
 # ✅ Use CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -64,4 +74,4 @@ async def generate_response_chunks(prompt: str):
 # ✅ API route
 @app.post("/ask")
 async def ask(question: Question):
-    return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain")

 import os
 import asyncio
+# ✅ Set all cache directories to a writable location
+cache_dir = "/tmp/hf_home"
+os.environ["HF_HOME"] = cache_dir
+os.environ["TRANSFORMERS_CACHE"] = cache_dir
+os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
+# ✅ Create cache directory with proper permissions
+os.makedirs(cache_dir, exist_ok=True)
+os.chmod(cache_dir, 0o777)  # Make writable by all
 # ✅ Load model and tokenizer
 model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
+    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
+except Exception as e:
+    print(f"Error loading model: {e}")
+    raise
 # ✅ Use CUDA if available
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ✅ API route
 @app.post("/ask")
 async def ask(question: Question):
+    return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain"