Spaces:

paradiseDev
/

ConversAI_Playground

Build error

App Files Files Community

null and void commited on Jul 19, 2024

Commit

9012900

verified ·

1 Parent(s): 212c5a9

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -15

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import time
 print(f"CUDA is available: {torch.cuda.is_available()}")
 print(f"CUDA device count: {torch.cuda.device_count()}")
 if torch.cuda.is_available():
@@ -23,27 +25,31 @@ class ConversationManager:
         if not model_name:
             print("Error: Empty model name provided")
             return None
         if model_name in self.models:
             return self.models[model_name]
         try:
             print(f"Attempting to load model: {model_name}")
             tokenizer = AutoTokenizer.from_pretrained(model_name)
-            try:
-                # Try to load the model with GPU support
-                model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
-            except RuntimeError as e:
-                print(f"GPU loading failed, falling back to CPU: {e}")
-                model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cpu")
-            self.models[model_name] = (model, tokenizer)
-            print(f"Successfully loaded model: {model_name}")
-            return self.models[model_name]
-        except Exception as e:
-            print(f"Failed to load model {model_name}: {e}")
-            print(f"Error type: {type(e).__name__}")
-            print(f"Error details: {str(e)}")
-            return None
     def generate_response(self, model_name, prompt):
         model, tokenizer = self.load_model(model_name)

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import time
+import bitsandbytes as bnb
+print(f"bitsandbytes version: {bnb.__version__}")
 print(f"CUDA is available: {torch.cuda.is_available()}")
 print(f"CUDA device count: {torch.cuda.device_count()}")
 if torch.cuda.is_available():
         if not model_name:
             print("Error: Empty model name provided")
             return None
         if model_name in self.models:
             return self.models[model_name]
         try:
             print(f"Attempting to load model: {model_name}")
             tokenizer = AutoTokenizer.from_pretrained(model_name)
+        try:
+            # Try to load the model with 8-bit quantization
+            model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
+        except RuntimeError as e:
+            print(f"8-bit quantization not available, falling back to full precision: {e}")
+            if torch.cuda.is_available():
+                model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
+            else:
+                model = AutoModelForCausalLM.from_pretrained(model_name)
+        self.models[model_name] = (model, tokenizer)
+        print(f"Successfully loaded model: {model_name}")
+        return self.models[model_name]
+    except Exception as e:
+        print(f"Failed to load model {model_name}: {e}")
+        print(f"Error type: {type(e).__name__}")
+        print(f"Error details: {str(e)}")
+        return None
     def generate_response(self, model_name, prompt):
         model, tokenizer = self.load_model(model_name)