Mettaton

Running

App Files Files Community

DragonProgrammer commited on 19 days ago

Commit

1cd3b83

verified ·

1 Parent(s): bac1dc6

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -13

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ import re
 import requests
 import traceback
 import sys
 # --- LangChain and new Transformers imports ---
 from langchain.agents import AgentExecutor, create_react_agent
@@ -61,27 +63,57 @@ def safe_calculator_func(expression: str) -> str:
         return f"Error calculating '{expression}': Invalid expression or calculation error ({e})."
-# --- LangChain Agent Definition ---
 class LangChainAgentWrapper:
     def __init__(self):
         print("Initializing LangChainAgentWrapper...")
-        # --- CHANGE 1: Switched to a smaller, CPU-friendly model ---
-        model_id = "google/flan-t5-base"
         try:
-            hf_auth_token = os.getenv("HF_TOKEN") # Good practice to keep, but not needed for FLAN-T5
-            # --- CHANGE 2 & 3: Use the correct task for T5 and remove quantization ---
-            # We no longer need to load the tokenizer and model separately,
-            # as we are not applying a custom quantization config.
-            print(f"Loading model pipeline for: {model_id}")
             llm_pipeline = transformers.pipeline(
-                "text2text-generation", # <<< IMPORTANT: Changed task for T5 models
-                model=model_id,
-                device_map="auto"
             )
-            print("Model pipeline loaded successfully.")
             # Wrap the pipeline in a LangChain LLM object
             self.llm = HuggingFacePipeline(pipeline=llm_pipeline)

 import requests
 import traceback
 import sys
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 # --- LangChain and new Transformers imports ---
 from langchain.agents import AgentExecutor, create_react_agent
         return f"Error calculating '{expression}': Invalid expression or calculation error ({e})."
 class LangChainAgentWrapper:
     def __init__(self):
         print("Initializing LangChainAgentWrapper...")
+        model_id = "google/gemma-2b-it"
         try:
+            hf_auth_token = os.getenv("HF_TOKEN")
+            if not hf_auth_token:
+                raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
+            else:
+                print("HF_TOKEN secret found.")
+            # --- CORRECTED MODEL LOADING WITH QUANTIZATION ---
+            # 1. Create the 4-bit quantization configuration
+            print("Creating 4-bit quantization config...")
+            quantization_config = BitsAndBytesConfig(
+                load_in_4bit=True,
+                bnb_4bit_quant_type="nf4",
+                bnb_4bit_use_double_quant=True,
+                bnb_4bit_compute_dtype=torch.bfloat16
+            )
+            print("Quantization config created.")
+            # 2. Load the tokenizer separately
+            print(f"Loading tokenizer for: {model_id}")
+            tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
+            print("Tokenizer loaded successfully.")
+            # 3. Load the model with the quantization config
+            print(f"Loading model '{model_id}' with quantization...")
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                quantization_config=quantization_config,
+                device_map="auto",
+                token=hf_auth_token
+            )
+            print("Model loaded successfully.")
+            # 4. Create the Hugging Face pipeline using the pre-loaded model and tokenizer
+            print("Creating text-generation pipeline...")
             llm_pipeline = transformers.pipeline(
+                "text-generation",
+                model=model,
+                tokenizer=tokenizer,
+                max_new_tokens=512 # Add max_new_tokens to prevent overly long responses
             )
+            print("Model pipeline created successfully.")
+            # --- END OF CORRECTION ---
             # Wrap the pipeline in a LangChain LLM object
             self.llm = HuggingFacePipeline(pipeline=llm_pipeline)