Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -7,14 +7,24 @@ import torch
|
|
7 |
import os
|
8 |
import asyncio
|
9 |
|
10 |
-
# β
|
11 |
-
|
12 |
-
os.
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# β
Load model and tokenizer
|
15 |
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# β
Use CUDA if available
|
20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -64,4 +74,4 @@ async def generate_response_chunks(prompt: str):
|
|
64 |
# β
API route
|
65 |
@app.post("/ask")
|
66 |
async def ask(question: Question):
|
67 |
-
return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain"
|
|
|
7 |
import os
|
8 |
import asyncio
|
9 |
|
10 |
+
# β
Set all cache directories to a writable location
|
11 |
+
cache_dir = "/tmp/hf_home"
|
12 |
+
os.environ["HF_HOME"] = cache_dir
|
13 |
+
os.environ["TRANSFORMERS_CACHE"] = cache_dir
|
14 |
+
os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
|
15 |
+
|
16 |
+
# β
Create cache directory with proper permissions
|
17 |
+
os.makedirs(cache_dir, exist_ok=True)
|
18 |
+
os.chmod(cache_dir, 0o777) # Make writable by all
|
19 |
|
20 |
# β
Load model and tokenizer
|
21 |
model_name = "Qwen/Qwen2.5-0.5B-Instruct"
|
22 |
+
try:
|
23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
|
24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, cache_dir=cache_dir)
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Error loading model: {e}")
|
27 |
+
raise
|
28 |
|
29 |
# β
Use CUDA if available
|
30 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
74 |
# β
API route
|
75 |
@app.post("/ask")
|
76 |
async def ask(question: Question):
|
77 |
+
return StreamingResponse(generate_response_chunks(question.question), media_type="text/plain"
|