Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
import torch
|
| 4 |
from datasets import load_dataset
|
| 5 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
|
@@ -43,7 +43,7 @@ login(hf_token)
|
|
| 43 |
# Load Dataset and Prepare Knowledge Base
|
| 44 |
ds = load_dataset("codeparrot/apps", "all", split="train")
|
| 45 |
os.makedirs("knowledge_base", exist_ok=True)
|
| 46 |
-
for i, example in enumerate(ds.select(range(
|
| 47 |
solution = example['solutions'][0] if example['solutions'] else "No solution available"
|
| 48 |
with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
|
| 49 |
f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
|
|
@@ -57,20 +57,14 @@ faiss_index = faiss.IndexFlatL2(d)
|
|
| 57 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
| 58 |
index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
|
| 59 |
|
| 60 |
-
# Load LLaMA Model
|
| 61 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
| 62 |
-
quant_config = BitsAndBytesConfig(
|
| 63 |
-
load_in_4bit=True,
|
| 64 |
-
bnb_4bit_compute_dtype=torch.float16,
|
| 65 |
-
bnb_4bit_quant_type="nf4",
|
| 66 |
-
bnb_4bit_use_double_quant=True
|
| 67 |
-
)
|
| 68 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 69 |
-
device = "
|
| 70 |
model = AutoModelForCausalLM.from_pretrained(
|
| 71 |
model_name,
|
| 72 |
-
|
| 73 |
-
|
| 74 |
)
|
| 75 |
if tokenizer.pad_token is None:
|
| 76 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 1 |
from fastapi import FastAPI
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
import torch
|
| 4 |
from datasets import load_dataset
|
| 5 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
|
|
|
| 43 |
# Load Dataset and Prepare Knowledge Base
|
| 44 |
ds = load_dataset("codeparrot/apps", "all", split="train")
|
| 45 |
os.makedirs("knowledge_base", exist_ok=True)
|
| 46 |
+
for i, example in enumerate(ds.select(range(50))): # Reduced to 50 for memory
|
| 47 |
solution = example['solutions'][0] if example['solutions'] else "No solution available"
|
| 48 |
with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
|
| 49 |
f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
|
|
|
|
| 57 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
| 58 |
index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
|
| 59 |
|
| 60 |
+
# Load LLaMA Model (without quantization, on CPU)
|
| 61 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 63 |
+
device = "cpu" # Force CPU usage
|
| 64 |
model = AutoModelForCausalLM.from_pretrained(
|
| 65 |
model_name,
|
| 66 |
+
device_map="cpu", # Explicitly map to CPU
|
| 67 |
+
torch_dtype=torch.float32 # Use float32 for CPU compatibility
|
| 68 |
)
|
| 69 |
if tokenizer.pad_token is None:
|
| 70 |
tokenizer.pad_token = tokenizer.eos_token
|