DragonProgrammer commited on
Commit
765e810
·
verified ·
1 Parent(s): dd964ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -78,31 +78,55 @@ class LangChainAgentWrapper:
78
  def __init__(self):
79
  print("Initializing LangChainAgentWrapper...")
80
 
81
- # Switched to a smaller, CPU-friendly instruction-tuned model
82
- model_id = "google/flan-t5-base"
83
 
84
  try:
85
  hf_auth_token = os.getenv("HF_TOKEN")
86
- print(f"Loading model pipeline for: {model_id}")
87
-
88
- # We load the model and tokenizer objects first
89
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
90
- model = transformers.AutoModelForSeq2SeqLM.from_pretrained(model_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Now we use our custom FlanT5Pipeline class
93
- llm_pipeline = FlanT5Pipeline(
94
- task="text2text-generation",
 
95
  model=model,
96
  tokenizer=tokenizer,
97
- device_map="auto",
98
- max_new_tokens=512
99
  )
100
- print("Model pipeline loaded successfully.")
101
 
102
  # Wrap the pipeline in a LangChain LLM object
103
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
104
 
105
- # Define the list of LangChain tools (this part is unchanged and correct)
106
  self.tools = [
107
  Tool(
108
  name="get_current_time_in_timezone",
@@ -118,7 +142,7 @@ class LangChainAgentWrapper:
118
  ]
119
  print(f"Tools prepared for agent: {[tool.name for tool in self.tools]}")
120
 
121
- # Create the ReAct agent prompt (this part is unchanged and correct)
122
  react_prompt = PromptTemplate.from_template(
123
  """
124
  You are a helpful assistant. Answer the following questions as best you can.
@@ -144,7 +168,7 @@ class LangChainAgentWrapper:
144
  """
145
  )
146
 
147
- # Create the agent and executor (this part is unchanged and correct)
148
  agent = create_react_agent(self.llm, self.tools, react_prompt)
149
  self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
150
  print("LangChain agent created successfully.")
@@ -157,9 +181,7 @@ class LangChainAgentWrapper:
157
  def __call__(self, question: str) -> str:
158
  print(f"\n--- LangChainAgentWrapper received question: {question[:100]}... ---")
159
  try:
160
- # Invoke the agent executor
161
  response = self.agent_executor.invoke({"input": question})
162
- # The answer is in the 'output' key of the response dictionary
163
  return response.get("output", "No output found.")
164
  except Exception as e:
165
  print(f"ERROR: LangChain agent execution failed: {e}")
 
78
  def __init__(self):
79
  print("Initializing LangChainAgentWrapper...")
80
 
81
+ # We will use the more powerful gemma-2b-it model, but load it in 4-bit.
82
+ model_id = "google/gemma-2b-it"
83
 
84
  try:
85
  hf_auth_token = os.getenv("HF_TOKEN")
86
+ if not hf_auth_token:
87
+ raise ValueError("HF_TOKEN secret is missing. It is required for downloading models.")
88
+ else:
89
+ print("HF_TOKEN secret found.")
90
+
91
+ # 1. Create the 4-bit quantization configuration.
92
+ print("Creating 4-bit quantization config...")
93
+ quantization_config = BitsAndBytesConfig(
94
+ load_in_4bit=True,
95
+ bnb_4bit_quant_type="nf4",
96
+ bnb_4bit_use_double_quant=True,
97
+ bnb_4bit_compute_dtype=torch.bfloat16
98
+ )
99
+ print("Quantization config created.")
100
+
101
+ # 2. Load the tokenizer.
102
+ print(f"Loading tokenizer for: {model_id}")
103
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_auth_token)
104
+ print("Tokenizer loaded successfully.")
105
+
106
+ # 3. Load the model with the quantization config.
107
+ print(f"Loading model '{model_id}' with quantization...")
108
+ model = AutoModelForCausalLM.from_pretrained(
109
+ model_id,
110
+ quantization_config=quantization_config,
111
+ device_map="auto",
112
+ token=hf_auth_token
113
+ )
114
+ print("Model loaded successfully.")
115
 
116
+ # 4. Create the Hugging Face pipeline using the pre-loaded model and tokenizer.
117
+ print("Creating text-generation pipeline...")
118
+ llm_pipeline = transformers.pipeline(
119
+ "text-generation", # Use "text-generation" for Gemma
120
  model=model,
121
  tokenizer=tokenizer,
122
+ max_new_tokens=512 # Add max_new_tokens to prevent overly long responses
 
123
  )
124
+ print("Model pipeline created successfully.")
125
 
126
  # Wrap the pipeline in a LangChain LLM object
127
  self.llm = HuggingFacePipeline(pipeline=llm_pipeline)
128
 
129
+ # Define the list of LangChain tools (this part is correct)
130
  self.tools = [
131
  Tool(
132
  name="get_current_time_in_timezone",
 
142
  ]
143
  print(f"Tools prepared for agent: {[tool.name for tool in self.tools]}")
144
 
145
+ # Create the ReAct agent prompt (this part is correct)
146
  react_prompt = PromptTemplate.from_template(
147
  """
148
  You are a helpful assistant. Answer the following questions as best you can.
 
168
  """
169
  )
170
 
171
+ # Create the agent and executor (this part is correct)
172
  agent = create_react_agent(self.llm, self.tools, react_prompt)
173
  self.agent_executor = AgentExecutor(agent=agent, tools=self.tools, verbose=True, handle_parsing_errors=True)
174
  print("LangChain agent created successfully.")
 
181
  def __call__(self, question: str) -> str:
182
  print(f"\n--- LangChainAgentWrapper received question: {question[:100]}... ---")
183
  try:
 
184
  response = self.agent_executor.invoke({"input": question})
 
185
  return response.get("output", "No output found.")
186
  except Exception as e:
187
  print(f"ERROR: LangChain agent execution failed: {e}")