DragonProgrammer commited on
Commit
7dc08c7
·
verified ·
1 Parent(s): c9f6a0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -67,20 +67,27 @@ class LangChainAgentWrapper:
67
  def __init__(self):
68
  print("Initializing LangChainAgentWrapper...")
69
 
70
- # Switched to a smaller, CPU-friendly instruction-tuned model
71
  model_id = "google/flan-t5-base"
72
 
73
  try:
74
- hf_auth_token = os.getenv("HF_TOKEN")
75
  print(f"Loading model pipeline for: {model_id}")
76
 
77
- # For FLAN-T5, we use the "text2text-generation" task.
78
- # We also remove quantization as it's not needed for this smaller model.
79
- llm_pipeline = transformers.pipeline(
80
- "text2text-generation", # <<< IMPORTANT: Changed task for T5 models
81
- model=model_id,
82
- device_map="auto"
 
 
 
 
 
 
 
83
  )
 
 
84
  print("Model pipeline loaded successfully.")
85
 
86
  # Wrap the pipeline in a LangChain LLM object
 
67
  def __init__(self):
68
  print("Initializing LangChainAgentWrapper...")
69
 
 
70
  model_id = "google/flan-t5-base"
71
 
72
  try:
 
73
  print(f"Loading model pipeline for: {model_id}")
74
 
75
+ # --- MODIFICATION: Use the custom pipeline class ---
76
+ # Load the tokenizer first
77
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
78
+ # Load the model
79
+ model = transformers.AutoModelForSeq2SeqLM.from_pretrained(model_id)
80
+
81
+ # Create an instance of our custom pipeline
82
+ llm_pipeline = FlanT5Pipeline(
83
+ task="text2text-generation",
84
+ model=model,
85
+ tokenizer=tokenizer,
86
+ device_map="auto",
87
+ max_new_tokens=512 # Add max_new_tokens to control output length
88
  )
89
+ # --- END MODIFICATION ---
90
+
91
  print("Model pipeline loaded successfully.")
92
 
93
  # Wrap the pipeline in a LangChain LLM object