Spaces:

aamirhameed
/

xTwin

Sleeping

App Files Files Community

aamirhameed commited on Jun 1

Commit

e67deaf

verified ·

1 Parent(s): 6a55455

Update knowledge_engine.py

Browse files

Files changed (1) hide show

knowledge_engine.py +23 -18

knowledge_engine.py CHANGED Viewed

@@ -14,6 +14,9 @@ from langchain_community.llms import HuggingFacePipeline
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 class CPULLMProvider:
     """CPU-based LLM provider using HuggingFace models"""
@@ -23,10 +26,10 @@ class CPULLMProvider:
         self.is_available = False
         self.current_model = None
-        # CPU-friendly models (small enough to run on CPU)
         self.cpu_models = [
-            "microsoft/phi",          # 2.7B params, good for CPU
-            "distilbert/distilgpt2"     # Very small 82M params
         ]
     def initialize(self) -> bool:
@@ -36,49 +39,50 @@ class CPULLMProvider:
                 print(f"[i] Trying to load {model_id}...")
                 tokenizer = AutoTokenizer.from_pretrained(model_id)
-                model = AutoModelForCausalLM.from_pretrained(model_id)
                 pipe = pipeline(
-                    "text-generation",
                     model=model,
                     tokenizer=tokenizer,
                     max_new_tokens=256,
                     temperature=0.3,
                     top_p=0.95,
-                    device="cpu"  # Force CPU usage
                 )
                 self.llm = HuggingFacePipeline(pipeline=pipe)
                 self.current_model = model_id
                 self.is_available = True
-                # Test the model
-                test_response = self.invoke("Hello")
                 if test_response and len(test_response) > 0:
                     print(f"[✓] Successfully loaded {model_id}")
                     return True
             except Exception as e:
-                print(f"[!] Failed to load {model_id}: {str(e)[:100]}...")
                 continue
         print("[!] All CPU models failed to load")
         return False
     def invoke(self, prompt: str) -> str:
-        """Invoke the CPU model with optimized prompt"""
         if not self.llm:
             raise Exception("CPU LLM not initialized")
         try:
-            # Format prompt based on model
-            if "phi" in self.current_model.lower():
-                formatted_prompt = f"Instruct: {prompt}\nOutput:"
-            elif "llama" in self.current_model.lower():
-                formatted_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
-            else:
-                formatted_prompt = prompt
             response = self.llm.invoke(formatted_prompt)
             return response.strip()
         except Exception as e:
@@ -86,6 +90,7 @@ class CPULLMProvider:
             raise
 class KnowledgeManager:
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp()

 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, pipeline
+from langchain.llms import HuggingFacePipeline
 class CPULLMProvider:
     """CPU-based LLM provider using HuggingFace models"""
         self.is_available = False
         self.current_model = None
+        # CPU-friendly models
         self.cpu_models = [
+            "google/flan-t5-small",          # Encoder-decoder model
+            "distilbert/distilgpt2"          # Decoder-only (GPT-style)
         ]
     def initialize(self) -> bool:
                 print(f"[i] Trying to load {model_id}...")
                 tokenizer = AutoTokenizer.from_pretrained(model_id)
+                # Detect model type based on name
+                if "flan" in model_id or "t5" in model_id:
+                    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+                    task = "text2text-generation"
+                else:
+                    model = AutoModelForCausalLM.from_pretrained(model_id)
+                    task = "text-generation"
                 pipe = pipeline(
+                    task,
                     model=model,
                     tokenizer=tokenizer,
                     max_new_tokens=256,
                     temperature=0.3,
                     top_p=0.95,
+                    device="cpu"
                 )
                 self.llm = HuggingFacePipeline(pipeline=pipe)
                 self.current_model = model_id
                 self.is_available = True
+                # Test model
+                test_response = self.invoke("Hello, who are you?")
                 if test_response and len(test_response) > 0:
                     print(f"[✓] Successfully loaded {model_id}")
                     return True
             except Exception as e:
+                print(f"[!] Failed to load {model_id}: {str(e)[:200]}...")
                 continue
         print("[!] All CPU models failed to load")
         return False
     def invoke(self, prompt: str) -> str:
+        """Invoke the CPU model with prompt"""
         if not self.llm:
             raise Exception("CPU LLM not initialized")
         try:
+            # Optionally modify prompt for specific models if needed
+            formatted_prompt = prompt
             response = self.llm.invoke(formatted_prompt)
             return response.strip()
         except Exception as e:
             raise
 class KnowledgeManager:
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp()