reab5555's picture
Upload 8 files
e3551a8 verified
raw
history blame
2.01 kB
# processing.py
from langchain.schema import HumanMessage
from output_parser import attachment_parser, bigfive_parser, personality_parser
def load_text(file_path: str) -> str:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read().strip()
def truncate_text(text: str, max_tokens: int = 10000) -> str:
words = text.split()
if len(words) > max_tokens:
truncated_text = ' '.join(words[:max_tokens])
print(f"Text truncated from {len(words)} to {max_tokens} words")
return truncated_text
print(f"Text not truncated, contains {len(words)} words")
return text
def process_task(llm, input_text: str, general_task: str, specific_task: str, knowledge: str, output_parser):
truncated_input = truncate_text(input_text)
prompt = f"""{general_task}
{specific_task}
Knowledge: {knowledge}
Input: {truncated_input}
{output_parser.get_format_instructions()}
Analysis:"""
messages = [HumanMessage(content=prompt)]
response = llm(messages)
print(response)
try:
parsed_output = output_parser.parse(response.content)
return parsed_output
except Exception as e:
print(f"Error parsing output: {e}")
return None
def process_input(input_text: str, llm):
general_task = load_text("tasks/general_task.txt")
tasks = [
("attachments", "tasks/Attachments_task.txt", "knowledge/bartholomew_attachments_definitions.txt",
attachment_parser),
("bigfive", "tasks/BigFive_task.txt", "knowledge/bigfive_definitions.txt", bigfive_parser),
("personalities", "tasks/Personalities_task.txt", "knowledge/personalities_definitions.txt", personality_parser)
]
results = {}
for task_name, task_file, knowledge_file, parser in tasks:
specific_task = load_text(task_file)
knowledge = load_text(knowledge_file)
results[task_name] = process_task(llm, input_text, general_task, specific_task, knowledge, parser)
return results