Spaces:

reab5555
/

Multiple-Speakers-Personality-Analyzer

Runtime error

App Files Files Community

reab5555 commited on Aug 9, 2024

Commit

b88e12c

verified ·

1 Parent(s): b01b6c3

Update processing.py

Browse files

Files changed (1) hide show

processing.py +50 -14

processing.py CHANGED Viewed

@@ -1,13 +1,48 @@
-# processing.py
 from langchain.schema import HumanMessage
 from output_parser import attachment_parser, bigfive_parser, personality_parser
 def load_text(file_path: str) -> str:
     with open(file_path, 'r', encoding='utf-8') as file:
         return file.read().strip()
 def truncate_text(text: str, max_tokens: int = 10000) -> str:
     words = text.split()
     if len(words) > max_tokens:
@@ -17,15 +52,19 @@ def truncate_text(text: str, max_tokens: int = 10000) -> str:
     print(f"Text not truncated, contains {len(words)} words")
     return text
-def process_task(llm, input_text: str, general_task: str, specific_task: str, knowledge: str, output_parser):
     truncated_input = truncate_text(input_text)
     prompt = f"""{general_task}
 {specific_task}
-Knowledge: {knowledge}
 Input: {truncated_input}
@@ -44,22 +83,19 @@ Analysis:"""
         print(f"Error parsing output: {e}")
         return None
 def process_input(input_text: str, llm):
     general_task = load_text("tasks/general_task.txt")
     tasks = [
-        ("attachments", "tasks/Attachments_task.txt", "knowledge/bartholomew_attachments_definitions.txt",
-         attachment_parser),
-        ("bigfive", "tasks/BigFive_task.txt", "knowledge/bigfive_definitions.txt", bigfive_parser),
-        ("personalities", "tasks/Personalities_task.txt", "knowledge/personalities_definitions.txt", personality_parser)
     ]
     results = {}
-    for task_name, task_file, knowledge_file, parser in tasks:
         specific_task = load_text(task_file)
-        knowledge = load_text(knowledge_file)
-        results[task_name] = process_task(llm, input_text, general_task, specific_task, knowledge, parser)
-    return results

 from langchain.schema import HumanMessage
 from output_parser import attachment_parser, bigfive_parser, personality_parser
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.retrievers import RetrievalQA
+from llm_loader import load_model  # Import the function to load the model
+from config import openai_api_key  # Import the API key from config.py
+import os
+# Initialize embeddings and FAISS index
+embedding_model = OpenAIEmbeddings()
+# Path to the knowledge files
+knowledge_files = {
+    "attachments": "knowledge/bartholomew_attachments_definitions.txt",
+    "bigfive": "knowledge/bigfive_definitions.txt",
+    "personalities": "knowledge/personalities_definitions.txt"
+}
+# Load the content of knowledge files and create a list of documents
+documents = []
+for key, file_path in knowledge_files.items():
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read().strip()
+        documents.append(content)
+# Create a FAISS index from the knowledge documents
+faiss_index = FAISS.from_texts(documents, embedding_model)
+# Save FAISS index locally (optional, in case you want to persist it)
+faiss_index.save_local("faiss_index")
+# If you want to load the FAISS index later, use this:
+# faiss_index = FAISS.load_local("faiss_index", embedding_model)
+# Load the LLM using llm_loader.py
+llm = load_model(openai_api_key)  # Assuming load_model function takes the API key as an argument
+# Initialize the retrieval chain
+qa_chain = RetrievalQA(llm=llm, retriever=faiss_index.as_retriever())
 def load_text(file_path: str) -> str:
     with open(file_path, 'r', encoding='utf-8') as file:
         return file.read().strip()
 def truncate_text(text: str, max_tokens: int = 10000) -> str:
     words = text.split()
     if len(words) > max_tokens:
     print(f"Text not truncated, contains {len(words)} words")
     return text
+def process_task(llm, input_text: str, general_task: str, specific_task: str, output_parser, qa_chain):
     truncated_input = truncate_text(input_text)
+    # Perform retrieval to get the most relevant context
+    relevant_docs = qa_chain({"query": truncated_input})
+    retrieved_knowledge = "\n".join([doc.page_content for doc in relevant_docs['documents']])
+    # Combine the retrieved knowledge with the original prompt
     prompt = f"""{general_task}
 {specific_task}
+Retrieved Knowledge: {retrieved_knowledge}
 Input: {truncated_input}
         print(f"Error parsing output: {e}")
         return None
 def process_input(input_text: str, llm):
     general_task = load_text("tasks/general_task.txt")
     tasks = [
+        ("attachments", "tasks/Attachments_task.txt", attachment_parser),
+        ("bigfive", "tasks/BigFive_task.txt", bigfive_parser),
+        ("personalities", "tasks/Personalities_task.txt", personality_parser)
     ]
     results = {}
+    for task_name, task_file, parser in tasks:
         specific_task = load_text(task_file)
+        results[task_name] = process_task(llm, input_text, general_task, specific_task, parser, qa_chain)
+    return results