Schmitz005 commited on
Commit
f2b5dca
·
verified ·
1 Parent(s): 9ecdbea

Delete whalecore

Browse files
whalecore/__init__.py DELETED
File without changes
whalecore/agents.py DELETED
@@ -1,36 +0,0 @@
1
- import yaml
2
-
3
- class Agent:
4
- def __init__(self, name, persona, instructions):
5
- self.name = name
6
- self.persona = persona
7
- self.instructions = instructions
8
-
9
- def chat(self, message):
10
- # Placeholder logic — replace with real LLM call later
11
- return f"🧠 {self.name} says:\n{self.instructions}\n\n{self.persona}\n\nYou said: {message[:260]}..."
12
-
13
- def load_agents(config_path="config.yaml"):
14
- with open(config_path, 'r') as f:
15
- config = yaml.safe_load(f)
16
-
17
- assert isinstance(config, dict), "YAML must contain a top-level 'agents:' key"
18
- assert 'agents' in config, "Missing 'agents' key in YAML file"
19
-
20
- print("🧠 YAML loaded successfully:", config)
21
-
22
- agents = []
23
- for agent_conf in config['agents']:
24
- agent = Agent(
25
- name=agent_conf['name'],
26
- persona=agent_conf['persona'],
27
- instructions=agent_conf['instructions']
28
- )
29
- agents.append(agent)
30
- return agents
31
-
32
- def run_agents_on_text(agent_list, text):
33
- results = {}
34
- for agent in agent_list:
35
- results[agent.name] = agent.chat(text)
36
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
whalecore/parser.py DELETED
@@ -1,49 +0,0 @@
1
- import os
2
- import PyPDF2
3
- import whisper
4
- from pydub import AudioSegment
5
- from sentence_transformers import SentenceTransformer
6
-
7
- import warnings
8
- warnings.filterwarnings(
9
- "ignore",
10
- category=FutureWarning,
11
- message="`clean_up_tokenization_spaces` was not set.*"
12
- )
13
- model = SentenceTransformer('all-MiniLM-L6-v2')
14
-
15
- def parse_pdf(filepath):
16
- text = ""
17
- with open(filepath, 'rb') as f:
18
- reader = PyPDF2.PdfReader(f)
19
- for page in reader.pages:
20
- text += page.extract_text() + "\n"
21
- return text
22
-
23
- def parse_audio(filepath):
24
- model = whisper.load_model("base")
25
- result = model.transcribe(filepath)
26
- return result['text']
27
-
28
- def parse_text(filepath):
29
- with open(filepath, 'r') as f:
30
- return f.read()
31
-
32
- def parse_file(filepath):
33
- if filepath.endswith('.pdf'):
34
- return parse_pdf(filepath)
35
- elif filepath.endswith(('.mp3', '.wav', '.m4a')):
36
- return parse_audio(filepath)
37
- elif filepath.endswith('.txt'):
38
- return parse_text(filepath)
39
- else:
40
- raise ValueError(f"Unsupported file type: {filepath}")
41
-
42
- def chunk_text(text, chunk_size=300):
43
- words = text.split()
44
- return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
45
-
46
- def chunk_and_embed(text):
47
- chunks = chunk_text(text)
48
- embeddings = model.encode(chunks).tolist()
49
- return list(zip(chunks, embeddings))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
whalecore/rag.py DELETED
@@ -1,37 +0,0 @@
1
- from sentence_transformers import SentenceTransformer
2
- from pymongo import MongoClient
3
- import numpy as np
4
-
5
- model = SentenceTransformer('all-MiniLM-L6-v2')
6
- client = MongoClient()
7
- db = client['huggingwhale']
8
- collection = db['docs']
9
-
10
- def chunk_text(text, chunk_size=300):
11
- words = text.split()
12
- return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
13
-
14
- def embed_chunks(chunks):
15
- return model.encode(chunks).tolist()
16
-
17
- def store_embeddings(chunks, embeddings):
18
- docs = [
19
- {"chunk": chunk, "embedding": emb}
20
- for chunk, emb in zip(chunks, embeddings)
21
- ]
22
- collection.insert_many(docs)
23
-
24
- def query_rag(question, top_k=3):
25
- question_vec = model.encode([question])[0]
26
- results = collection.aggregate([
27
- {
28
- "$vectorSearch": {
29
- "index": "default",
30
- "path": "embedding",
31
- "queryVector": question_vec,
32
- "numCandidates": 100,
33
- "limit": top_k
34
- }
35
- }
36
- ])
37
- return [doc['chunk'] for doc in results]