Spaces:

pradeepsengarr
/

Custom_Rag_Bot

Running

App Files Files Community

pradeepsengarr commited on Jun 7

Commit

3529e03

verified ·

1 Parent(s): 541a176

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -27

app.py CHANGED Viewed

@@ -27,34 +27,49 @@ class DocumentRAG:
         self.is_indexed = False
     def setup_llm(self):
-        """Setup quantized Mistral model"""
-        try:
-            quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4"
-            )
-            model_name = "mistralai/Mistral-7B-Instruct-v0.1"
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-            if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.eos_token
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                quantization_config=quantization_config,
-                device_map="auto",
-                torch_dtype=torch.float16,
-                trust_remote_code=True
-            )
-            print("✅ Quantized Mistral model loaded")
-        except Exception as e:
-            print(f"❌ Error loading model: {e}")
-            # Fallback to a smaller model if Mistral fails
             self.setup_fallback_model()
     def setup_fallback_model(self):
         """Fallback to smaller model if Mistral fails"""

         self.is_indexed = False
     def setup_llm(self):
+    """Setup quantized Mistral model"""
+    try:
+        # Check if CUDA is available
+        if not torch.cuda.is_available():
+            print("⚠️ CUDA not available, falling back to CPU or alternative model")
             self.setup_fallback_model()
+            return
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4"
+        )
+        model_name = "mistralai/Mistral-7B-Instruct-v0.1"
+        # Load tokenizer first
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        # Fix padding token issue
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        # Load model with quantization
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            quantization_config=quantization_config,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True  # Added for better memory management
+        )
+        print("✅ Quantized Mistral model loaded successfully")
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        print("🔄 Falling back to alternative model...")
+        self.setup_fallback_model()
     def setup_fallback_model(self):
         """Fallback to smaller model if Mistral fails"""