Spaces:
Runtime error
Runtime error
Update processing.py
Browse files- processing.py +49 -8
processing.py
CHANGED
@@ -7,6 +7,7 @@ from config import openai_api_key
|
|
7 |
from langchain.chains import RetrievalQA
|
8 |
import os
|
9 |
import json
|
|
|
10 |
|
11 |
# Initialize embeddings and FAISS index
|
12 |
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
@@ -46,6 +47,13 @@ def truncate_text(text: str, max_tokens: int = 10000) -> str:
|
|
46 |
print(f"Text not truncated, contains {len(words)} words")
|
47 |
return text
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def process_input(input_text: str, llm):
|
50 |
general_task = load_text("tasks/general_task.txt")
|
51 |
attachments_task = load_text("tasks/Attachments_task.txt")
|
@@ -84,30 +92,34 @@ Please provide a comprehensive analysis for each speaker, including:
|
|
84 |
2. Big Five traits (use the format from the Big Five Traits Task)
|
85 |
3. Personality disorders (use the format from the Personality Disorders Task)
|
86 |
|
87 |
-
Respond with a JSON object containing an array of speaker analyses. Each speaker analysis should include all three aspects mentioned above.
|
88 |
|
89 |
Analysis:"""
|
90 |
|
91 |
messages = [HumanMessage(content=prompt)]
|
92 |
response = llm.invoke(messages)
|
93 |
|
94 |
-
# Print the raw LLM model output
|
95 |
print("Raw LLM Model Output:")
|
96 |
print(response.content)
|
97 |
print("\n" + "-"*50 + "\n") # Separator for readability
|
98 |
|
99 |
try:
|
100 |
-
#
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
-
# Print the parsed JSON before further processing
|
104 |
print("Parsed JSON Output:")
|
105 |
print(json.dumps(parsed_json, indent=2))
|
106 |
print("\n" + "-"*50 + "\n") # Separator for readability
|
107 |
|
108 |
# Process the parsed JSON
|
109 |
results = {}
|
110 |
-
|
|
|
111 |
speaker_id = speaker_analysis.get('speaker', 'Unknown Speaker')
|
112 |
results[speaker_id] = {
|
113 |
'attachments': attachment_parser.parse_object(speaker_analysis.get('attachment_styles', {})),
|
@@ -115,7 +127,36 @@ Analysis:"""
|
|
115 |
'personalities': personality_parser.parse_object(speaker_analysis.get('personality_disorders', {}))
|
116 |
}
|
117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
except Exception as e:
|
120 |
-
print(f"
|
121 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from langchain.chains import RetrievalQA
|
8 |
import os
|
9 |
import json
|
10 |
+
import re
|
11 |
|
12 |
# Initialize embeddings and FAISS index
|
13 |
embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
|
|
|
47 |
print(f"Text not truncated, contains {len(words)} words")
|
48 |
return text
|
49 |
|
50 |
+
def extract_json_from_text(text):
|
51 |
+
# Find the first occurrence of a JSON-like structure
|
52 |
+
match = re.search(r'\{(?:[^{}]|(?R))*\}', text, re.DOTALL)
|
53 |
+
if match:
|
54 |
+
return match.group(0)
|
55 |
+
return None
|
56 |
+
|
57 |
def process_input(input_text: str, llm):
|
58 |
general_task = load_text("tasks/general_task.txt")
|
59 |
attachments_task = load_text("tasks/Attachments_task.txt")
|
|
|
92 |
2. Big Five traits (use the format from the Big Five Traits Task)
|
93 |
3. Personality disorders (use the format from the Personality Disorders Task)
|
94 |
|
95 |
+
Respond with a JSON object containing an array of speaker analyses under the key 'speaker_analyses'. Each speaker analysis should include all three aspects mentioned above.
|
96 |
|
97 |
Analysis:"""
|
98 |
|
99 |
messages = [HumanMessage(content=prompt)]
|
100 |
response = llm.invoke(messages)
|
101 |
|
|
|
102 |
print("Raw LLM Model Output:")
|
103 |
print(response.content)
|
104 |
print("\n" + "-"*50 + "\n") # Separator for readability
|
105 |
|
106 |
try:
|
107 |
+
# Extract JSON from the response
|
108 |
+
json_str = extract_json_from_text(response.content)
|
109 |
+
if not json_str:
|
110 |
+
raise ValueError("No valid JSON structure found in the response")
|
111 |
+
|
112 |
+
# Parse the JSON
|
113 |
+
parsed_json = json.loads(json_str)
|
114 |
|
|
|
115 |
print("Parsed JSON Output:")
|
116 |
print(json.dumps(parsed_json, indent=2))
|
117 |
print("\n" + "-"*50 + "\n") # Separator for readability
|
118 |
|
119 |
# Process the parsed JSON
|
120 |
results = {}
|
121 |
+
speaker_analyses = parsed_json.get('speaker_analyses', [])
|
122 |
+
for speaker_analysis in speaker_analyses:
|
123 |
speaker_id = speaker_analysis.get('speaker', 'Unknown Speaker')
|
124 |
results[speaker_id] = {
|
125 |
'attachments': attachment_parser.parse_object(speaker_analysis.get('attachment_styles', {})),
|
|
|
127 |
'personalities': personality_parser.parse_object(speaker_analysis.get('personality_disorders', {}))
|
128 |
}
|
129 |
|
130 |
+
if not results:
|
131 |
+
print("Warning: No speaker analyses found in the parsed JSON.")
|
132 |
+
return {"Unknown Speaker": {
|
133 |
+
'attachments': attachment_parser.parse_object({}),
|
134 |
+
'bigfive': bigfive_parser.parse_object({}),
|
135 |
+
'personalities': personality_parser.parse_object({})
|
136 |
+
}}
|
137 |
+
|
138 |
return results
|
139 |
+
except json.JSONDecodeError as e:
|
140 |
+
print(f"Error parsing JSON: {e}")
|
141 |
+
print("Raw content causing the error:")
|
142 |
+
print(response.content)
|
143 |
+
return {"Unknown Speaker": {
|
144 |
+
'attachments': attachment_parser.parse_object({}),
|
145 |
+
'bigfive': bigfive_parser.parse_object({}),
|
146 |
+
'personalities': personality_parser.parse_object({})
|
147 |
+
}}
|
148 |
except Exception as e:
|
149 |
+
print(f"Unexpected error: {e}")
|
150 |
+
return {"Unknown Speaker": {
|
151 |
+
'attachments': attachment_parser.parse_object({}),
|
152 |
+
'bigfive': bigfive_parser.parse_object({}),
|
153 |
+
'personalities': personality_parser.parse_object({})
|
154 |
+
}}
|
155 |
+
|
156 |
+
# For testing purposes
|
157 |
+
if __name__ == "__main__":
|
158 |
+
test_input = "This is a test input for processing."
|
159 |
+
result = process_input(test_input, llm)
|
160 |
+
print("\nProcessing completed.")
|
161 |
+
print("Final results:")
|
162 |
+
print(json.dumps(result, indent=2))
|