aamirhameed commited on
Commit
e67deaf
·
verified ·
1 Parent(s): 6a55455

Update knowledge_engine.py

Browse files
Files changed (1) hide show
  1. knowledge_engine.py +23 -18
knowledge_engine.py CHANGED
@@ -14,6 +14,9 @@ from langchain_community.llms import HuggingFacePipeline
14
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
15
 
16
 
 
 
 
17
  class CPULLMProvider:
18
  """CPU-based LLM provider using HuggingFace models"""
19
 
@@ -23,10 +26,10 @@ class CPULLMProvider:
23
  self.is_available = False
24
  self.current_model = None
25
 
26
- # CPU-friendly models (small enough to run on CPU)
27
  self.cpu_models = [
28
- "microsoft/phi", # 2.7B params, good for CPU
29
- "distilbert/distilgpt2" # Very small 82M params
30
  ]
31
 
32
  def initialize(self) -> bool:
@@ -36,49 +39,50 @@ class CPULLMProvider:
36
  print(f"[i] Trying to load {model_id}...")
37
 
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)
39
- model = AutoModelForCausalLM.from_pretrained(model_id)
 
 
 
 
 
 
 
40
 
41
  pipe = pipeline(
42
- "text-generation",
43
  model=model,
44
  tokenizer=tokenizer,
45
  max_new_tokens=256,
46
  temperature=0.3,
47
  top_p=0.95,
48
- device="cpu" # Force CPU usage
49
  )
50
 
51
  self.llm = HuggingFacePipeline(pipeline=pipe)
52
  self.current_model = model_id
53
  self.is_available = True
54
 
55
- # Test the model
56
- test_response = self.invoke("Hello")
57
  if test_response and len(test_response) > 0:
58
  print(f"[✓] Successfully loaded {model_id}")
59
  return True
60
 
61
  except Exception as e:
62
- print(f"[!] Failed to load {model_id}: {str(e)[:100]}...")
63
  continue
64
 
65
  print("[!] All CPU models failed to load")
66
  return False
67
 
68
  def invoke(self, prompt: str) -> str:
69
- """Invoke the CPU model with optimized prompt"""
70
  if not self.llm:
71
  raise Exception("CPU LLM not initialized")
72
 
73
  try:
74
- # Format prompt based on model
75
- if "phi" in self.current_model.lower():
76
- formatted_prompt = f"Instruct: {prompt}\nOutput:"
77
- elif "llama" in self.current_model.lower():
78
- formatted_prompt = f"<|user|>\n{prompt}\n<|assistant|>\n"
79
- else:
80
- formatted_prompt = prompt
81
-
82
  response = self.llm.invoke(formatted_prompt)
83
  return response.strip()
84
  except Exception as e:
@@ -86,6 +90,7 @@ class CPULLMProvider:
86
  raise
87
 
88
 
 
89
  class KnowledgeManager:
90
  def __init__(self):
91
  self.temp_dir = tempfile.mkdtemp()
 
14
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
15
 
16
 
17
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM, pipeline
18
+ from langchain.llms import HuggingFacePipeline
19
+
20
  class CPULLMProvider:
21
  """CPU-based LLM provider using HuggingFace models"""
22
 
 
26
  self.is_available = False
27
  self.current_model = None
28
 
29
+ # CPU-friendly models
30
  self.cpu_models = [
31
+ "google/flan-t5-small", # Encoder-decoder model
32
+ "distilbert/distilgpt2" # Decoder-only (GPT-style)
33
  ]
34
 
35
  def initialize(self) -> bool:
 
39
  print(f"[i] Trying to load {model_id}...")
40
 
41
  tokenizer = AutoTokenizer.from_pretrained(model_id)
42
+
43
+ # Detect model type based on name
44
+ if "flan" in model_id or "t5" in model_id:
45
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
46
+ task = "text2text-generation"
47
+ else:
48
+ model = AutoModelForCausalLM.from_pretrained(model_id)
49
+ task = "text-generation"
50
 
51
  pipe = pipeline(
52
+ task,
53
  model=model,
54
  tokenizer=tokenizer,
55
  max_new_tokens=256,
56
  temperature=0.3,
57
  top_p=0.95,
58
+ device="cpu"
59
  )
60
 
61
  self.llm = HuggingFacePipeline(pipeline=pipe)
62
  self.current_model = model_id
63
  self.is_available = True
64
 
65
+ # Test model
66
+ test_response = self.invoke("Hello, who are you?")
67
  if test_response and len(test_response) > 0:
68
  print(f"[✓] Successfully loaded {model_id}")
69
  return True
70
 
71
  except Exception as e:
72
+ print(f"[!] Failed to load {model_id}: {str(e)[:200]}...")
73
  continue
74
 
75
  print("[!] All CPU models failed to load")
76
  return False
77
 
78
  def invoke(self, prompt: str) -> str:
79
+ """Invoke the CPU model with prompt"""
80
  if not self.llm:
81
  raise Exception("CPU LLM not initialized")
82
 
83
  try:
84
+ # Optionally modify prompt for specific models if needed
85
+ formatted_prompt = prompt
 
 
 
 
 
 
86
  response = self.llm.invoke(formatted_prompt)
87
  return response.strip()
88
  except Exception as e:
 
90
  raise
91
 
92
 
93
+
94
  class KnowledgeManager:
95
  def __init__(self):
96
  self.temp_dir = tempfile.mkdtemp()