pradeepsengarr commited on
Commit
3529e03
·
verified ·
1 Parent(s): 541a176

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -27
app.py CHANGED
@@ -27,34 +27,49 @@ class DocumentRAG:
27
  self.is_indexed = False
28
 
29
  def setup_llm(self):
30
- """Setup quantized Mistral model"""
31
- try:
32
- quantization_config = BitsAndBytesConfig(
33
- load_in_4bit=True,
34
- bnb_4bit_compute_dtype=torch.float16,
35
- bnb_4bit_use_double_quant=True,
36
- bnb_4bit_quant_type="nf4"
37
- )
38
-
39
- model_name = "mistralai/Mistral-7B-Instruct-v0.1"
40
-
41
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
42
- if self.tokenizer.pad_token is None:
43
- self.tokenizer.pad_token = self.tokenizer.eos_token
44
-
45
- self.model = AutoModelForCausalLM.from_pretrained(
46
- model_name,
47
- quantization_config=quantization_config,
48
- device_map="auto",
49
- torch_dtype=torch.float16,
50
- trust_remote_code=True
51
- )
52
- print("✅ Quantized Mistral model loaded")
53
-
54
- except Exception as e:
55
- print(f"❌ Error loading model: {e}")
56
- # Fallback to a smaller model if Mistral fails
57
  self.setup_fallback_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def setup_fallback_model(self):
60
  """Fallback to smaller model if Mistral fails"""
 
27
  self.is_indexed = False
28
 
29
  def setup_llm(self):
30
+ """Setup quantized Mistral model"""
31
+ try:
32
+ # Check if CUDA is available
33
+ if not torch.cuda.is_available():
34
+ print("⚠️ CUDA not available, falling back to CPU or alternative model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  self.setup_fallback_model()
36
+ return
37
+
38
+ quantization_config = BitsAndBytesConfig(
39
+ load_in_4bit=True,
40
+ bnb_4bit_compute_dtype=torch.float16,
41
+ bnb_4bit_use_double_quant=True,
42
+ bnb_4bit_quant_type="nf4"
43
+ )
44
+
45
+ model_name = "mistralai/Mistral-7B-Instruct-v0.1"
46
+
47
+ # Load tokenizer first
48
+ self.tokenizer = AutoTokenizer.from_pretrained(
49
+ model_name,
50
+ trust_remote_code=True
51
+ )
52
+
53
+ # Fix padding token issue
54
+ if self.tokenizer.pad_token is None:
55
+ self.tokenizer.pad_token = self.tokenizer.eos_token
56
+
57
+ # Load model with quantization
58
+ self.model = AutoModelForCausalLM.from_pretrained(
59
+ model_name,
60
+ quantization_config=quantization_config,
61
+ device_map="auto",
62
+ torch_dtype=torch.float16,
63
+ trust_remote_code=True,
64
+ low_cpu_mem_usage=True # Added for better memory management
65
+ )
66
+
67
+ print("✅ Quantized Mistral model loaded successfully")
68
+
69
+ except Exception as e:
70
+ print(f"❌ Error loading model: {e}")
71
+ print("🔄 Falling back to alternative model...")
72
+ self.setup_fallback_model()
73
 
74
  def setup_fallback_model(self):
75
  """Fallback to smaller model if Mistral fails"""