Mettaton

Running

App Files Files Community

DragonProgrammer commited on 14 days ago

Commit

c9f6a0e

verified ·

1 Parent(s): 1cd3b83

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -43

app.py CHANGED Viewed

@@ -67,53 +67,21 @@ class LangChainAgentWrapper:
     def __init__(self):
         print("Initializing LangChainAgentWrapper...")
-        model_id = "google/gemma-2b-it"
         try:
             hf_auth_token = os.getenv("HF_TOKEN")
-            if not hf_auth_token:
-                raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
-            else:
-                print("HF_TOKEN secret found.")
-            # --- CORRECTED MODEL LOADING WITH QUANTIZATION ---
-            # 1. Create the 4-bit quantization configuration
-            print("Creating 4-bit quantization config...")
-            quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_compute_dtype=torch.bfloat16
-            )
-            print("Quantization config created.")
-            # 2. Load the tokenizer separately
-            print(f"Loading tokenizer for: {model_id}")
-            tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
-            print("Tokenizer loaded successfully.")
-            # 3. Load the model with the quantization config
-            print(f"Loading model '{model_id}' with quantization...")
-            model = AutoModelForCausalLM.from_pretrained(
-                model_id,
-                quantization_config=quantization_config,
-                device_map="auto",
-                token=hf_auth_token
-            )
-            print("Model loaded successfully.")
-            # 4. Create the Hugging Face pipeline using the pre-loaded model and tokenizer
-            print("Creating text-generation pipeline...")
             llm_pipeline = transformers.pipeline(
-                "text-generation",
-                model=model,
-                tokenizer=tokenizer,
-                max_new_tokens=512 # Add max_new_tokens to prevent overly long responses
             )
-            print("Model pipeline created successfully.")
-            # --- END OF CORRECTION ---
             # Wrap the pipeline in a LangChain LLM object
             self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
@@ -181,7 +149,7 @@ class LangChainAgentWrapper:
             print(f"ERROR: LangChain agent execution failed: {e}")
             traceback.print_exc()
             return f"Agent Error: Failed to process the question. Details: {e}"
 # --- Main Evaluation Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """

     def __init__(self):
         print("Initializing LangChainAgentWrapper...")
+        # Switched to a smaller, CPU-friendly instruction-tuned model
+        model_id = "google/flan-t5-base"
         try:
             hf_auth_token = os.getenv("HF_TOKEN")
+            print(f"Loading model pipeline for: {model_id}")
+            # For FLAN-T5, we use the "text2text-generation" task.
+            # We also remove quantization as it's not needed for this smaller model.
             llm_pipeline = transformers.pipeline(
+                "text2text-generation", # <<< IMPORTANT: Changed task for T5 models
+                model=model_id,
+                device_map="auto"
             )
+            print("Model pipeline loaded successfully.")
             # Wrap the pipeline in a LangChain LLM object
             self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
             print(f"ERROR: LangChain agent execution failed: {e}")
             traceback.print_exc()
             return f"Agent Error: Failed to process the question. Details: {e}"
 # --- Main Evaluation Logic ---
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     """