Spaces:

GuhanAein
/

program-solver-rag

Runtime error

App Files Files Community

GuhanAein commited on Mar 23

Commit

98e2f27

verified ·

1 Parent(s): 0aaba4f

Update main.py

Browse files

Files changed (1) hide show

main.py +6 -12

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from fastapi import FastAPI
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from datasets import load_dataset
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
@@ -43,7 +43,7 @@ login(hf_token)
 # Load Dataset and Prepare Knowledge Base
 ds = load_dataset("codeparrot/apps", "all", split="train")
 os.makedirs("knowledge_base", exist_ok=True)
-for i, example in enumerate(ds.select(range(100))):
     solution = example['solutions'][0] if example['solutions'] else "No solution available"
     with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
         f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
@@ -57,20 +57,14 @@ faiss_index = faiss.IndexFlatL2(d)
 vector_store = FaissVectorStore(faiss_index=faiss_index)
 index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
-# Load LLaMA Model
 model_name = "meta-llama/Llama-3.2-1B-Instruct"
-quant_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True
-)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    quantization_config=quant_config,
-    device_map="auto" if device == "cuda" else None
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token

 from fastapi import FastAPI
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 from datasets import load_dataset
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
 # Load Dataset and Prepare Knowledge Base
 ds = load_dataset("codeparrot/apps", "all", split="train")
 os.makedirs("knowledge_base", exist_ok=True)
+for i, example in enumerate(ds.select(range(50))):  # Reduced to 50 for memory
     solution = example['solutions'][0] if example['solutions'] else "No solution available"
     with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
         f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
 vector_store = FaissVectorStore(faiss_index=faiss_index)
 index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
+# Load LLaMA Model (without quantization, on CPU)
 model_name = "meta-llama/Llama-3.2-1B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+device = "cpu"  # Force CPU usage
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    device_map="cpu",  # Explicitly map to CPU
+    torch_dtype=torch.float32  # Use float32 for CPU compatibility
 )
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token