DragonProgrammer commited on
Commit
c9f6a0e
·
verified ·
1 Parent(s): 1cd3b83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -43
app.py CHANGED
@@ -67,53 +67,21 @@ class LangChainAgentWrapper:
67
  def __init__(self):
68
  print("Initializing LangChainAgentWrapper...")
69
 
70
- model_id = "google/gemma-2b-it"
 
71
 
72
  try:
73
  hf_auth_token = os.getenv("HF_TOKEN")
74
- if not hf_auth_token:
75
- raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
76
- else:
77
- print("HF_TOKEN secret found.")
78
-
79
- # --- CORRECTED MODEL LOADING WITH QUANTIZATION ---
80
-
81
- # 1. Create the 4-bit quantization configuration
82
- print("Creating 4-bit quantization config...")
83
- quantization_config = BitsAndBytesConfig(
84
- load_in_4bit=True,
85
- bnb_4bit_quant_type="nf4",
86
- bnb_4bit_use_double_quant=True,
87
- bnb_4bit_compute_dtype=torch.bfloat16
88
- )
89
- print("Quantization config created.")
90
-
91
- # 2. Load the tokenizer separately
92
- print(f"Loading tokenizer for: {model_id}")
93
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
94
- print("Tokenizer loaded successfully.")
95
-
96
- # 3. Load the model with the quantization config
97
- print(f"Loading model '{model_id}' with quantization...")
98
- model = AutoModelForCausalLM.from_pretrained(
99
- model_id,
100
- quantization_config=quantization_config,
101
- device_map="auto",
102
- token=hf_auth_token
103
- )
104
- print("Model loaded successfully.")
105
-
106
- # 4. Create the Hugging Face pipeline using the pre-loaded model and tokenizer
107
- print("Creating text-generation pipeline...")
108
  llm_pipeline = transformers.pipeline(
109
- "text-generation",
110
- model=model,
111
- tokenizer=tokenizer,
112
- max_new_tokens=512 # Add max_new_tokens to prevent overly long responses
113
  )
114
- print("Model pipeline created successfully.")
115
-
116
- # --- END OF CORRECTION ---
117
 
118
  # Wrap the pipeline in a LangChain LLM object
119
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
@@ -181,7 +149,7 @@ class LangChainAgentWrapper:
181
  print(f"ERROR: LangChain agent execution failed: {e}")
182
  traceback.print_exc()
183
  return f"Agent Error: Failed to process the question. Details: {e}"
184
-
185
  # --- Main Evaluation Logic ---
186
  def run_and_submit_all(profile: gr.OAuthProfile | None):
187
  """
 
67
  def __init__(self):
68
  print("Initializing LangChainAgentWrapper...")
69
 
70
+ # Switched to a smaller, CPU-friendly instruction-tuned model
71
+ model_id = "google/flan-t5-base"
72
 
73
  try:
74
  hf_auth_token = os.getenv("HF_TOKEN")
75
+ print(f"Loading model pipeline for: {model_id}")
76
+
77
+ # For FLAN-T5, we use the "text2text-generation" task.
78
+ # We also remove quantization as it's not needed for this smaller model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  llm_pipeline = transformers.pipeline(
80
+ "text2text-generation", # <<< IMPORTANT: Changed task for T5 models
81
+ model=model_id,
82
+ device_map="auto"
 
83
  )
84
+ print("Model pipeline loaded successfully.")
 
 
85
 
86
  # Wrap the pipeline in a LangChain LLM object
87
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
 
149
  print(f"ERROR: LangChain agent execution failed: {e}")
150
  traceback.print_exc()
151
  return f"Agent Error: Failed to process the question. Details: {e}"
152
+
153
  # --- Main Evaluation Logic ---
154
  def run_and_submit_all(profile: gr.OAuthProfile | None):
155
  """