reab5555 commited on
Commit
f33ef48
·
verified ·
1 Parent(s): 4064022

Update processing.py

Browse files
Files changed (1) hide show
  1. processing.py +49 -8
processing.py CHANGED
@@ -7,6 +7,7 @@ from config import openai_api_key
7
  from langchain.chains import RetrievalQA
8
  import os
9
  import json
 
10
 
11
  # Initialize embeddings and FAISS index
12
  embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
@@ -46,6 +47,13 @@ def truncate_text(text: str, max_tokens: int = 10000) -> str:
46
  print(f"Text not truncated, contains {len(words)} words")
47
  return text
48
 
 
 
 
 
 
 
 
49
  def process_input(input_text: str, llm):
50
  general_task = load_text("tasks/general_task.txt")
51
  attachments_task = load_text("tasks/Attachments_task.txt")
@@ -84,30 +92,34 @@ Please provide a comprehensive analysis for each speaker, including:
84
  2. Big Five traits (use the format from the Big Five Traits Task)
85
  3. Personality disorders (use the format from the Personality Disorders Task)
86
 
87
- Respond with a JSON object containing an array of speaker analyses. Each speaker analysis should include all three aspects mentioned above.
88
 
89
  Analysis:"""
90
 
91
  messages = [HumanMessage(content=prompt)]
92
  response = llm.invoke(messages)
93
 
94
- # Print the raw LLM model output
95
  print("Raw LLM Model Output:")
96
  print(response.content)
97
  print("\n" + "-"*50 + "\n") # Separator for readability
98
 
99
  try:
100
- # Parse the response as JSON
101
- parsed_json = json.loads(response.content)
 
 
 
 
 
102
 
103
- # Print the parsed JSON before further processing
104
  print("Parsed JSON Output:")
105
  print(json.dumps(parsed_json, indent=2))
106
  print("\n" + "-"*50 + "\n") # Separator for readability
107
 
108
  # Process the parsed JSON
109
  results = {}
110
- for speaker_analysis in parsed_json:
 
111
  speaker_id = speaker_analysis.get('speaker', 'Unknown Speaker')
112
  results[speaker_id] = {
113
  'attachments': attachment_parser.parse_object(speaker_analysis.get('attachment_styles', {})),
@@ -115,7 +127,36 @@ Analysis:"""
115
  'personalities': personality_parser.parse_object(speaker_analysis.get('personality_disorders', {}))
116
  }
117
 
 
 
 
 
 
 
 
 
118
  return results
 
 
 
 
 
 
 
 
 
119
  except Exception as e:
120
- print(f"Error parsing output: {e}")
121
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from langchain.chains import RetrievalQA
8
  import os
9
  import json
10
+ import re
11
 
12
  # Initialize embeddings and FAISS index
13
  embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
 
47
  print(f"Text not truncated, contains {len(words)} words")
48
  return text
49
 
50
+ def extract_json_from_text(text):
51
+ # Find the first occurrence of a JSON-like structure
52
+ match = re.search(r'\{(?:[^{}]|(?R))*\}', text, re.DOTALL)
53
+ if match:
54
+ return match.group(0)
55
+ return None
56
+
57
  def process_input(input_text: str, llm):
58
  general_task = load_text("tasks/general_task.txt")
59
  attachments_task = load_text("tasks/Attachments_task.txt")
 
92
  2. Big Five traits (use the format from the Big Five Traits Task)
93
  3. Personality disorders (use the format from the Personality Disorders Task)
94
 
95
+ Respond with a JSON object containing an array of speaker analyses under the key 'speaker_analyses'. Each speaker analysis should include all three aspects mentioned above.
96
 
97
  Analysis:"""
98
 
99
  messages = [HumanMessage(content=prompt)]
100
  response = llm.invoke(messages)
101
 
 
102
  print("Raw LLM Model Output:")
103
  print(response.content)
104
  print("\n" + "-"*50 + "\n") # Separator for readability
105
 
106
  try:
107
+ # Extract JSON from the response
108
+ json_str = extract_json_from_text(response.content)
109
+ if not json_str:
110
+ raise ValueError("No valid JSON structure found in the response")
111
+
112
+ # Parse the JSON
113
+ parsed_json = json.loads(json_str)
114
 
 
115
  print("Parsed JSON Output:")
116
  print(json.dumps(parsed_json, indent=2))
117
  print("\n" + "-"*50 + "\n") # Separator for readability
118
 
119
  # Process the parsed JSON
120
  results = {}
121
+ speaker_analyses = parsed_json.get('speaker_analyses', [])
122
+ for speaker_analysis in speaker_analyses:
123
  speaker_id = speaker_analysis.get('speaker', 'Unknown Speaker')
124
  results[speaker_id] = {
125
  'attachments': attachment_parser.parse_object(speaker_analysis.get('attachment_styles', {})),
 
127
  'personalities': personality_parser.parse_object(speaker_analysis.get('personality_disorders', {}))
128
  }
129
 
130
+ if not results:
131
+ print("Warning: No speaker analyses found in the parsed JSON.")
132
+ return {"Unknown Speaker": {
133
+ 'attachments': attachment_parser.parse_object({}),
134
+ 'bigfive': bigfive_parser.parse_object({}),
135
+ 'personalities': personality_parser.parse_object({})
136
+ }}
137
+
138
  return results
139
+ except json.JSONDecodeError as e:
140
+ print(f"Error parsing JSON: {e}")
141
+ print("Raw content causing the error:")
142
+ print(response.content)
143
+ return {"Unknown Speaker": {
144
+ 'attachments': attachment_parser.parse_object({}),
145
+ 'bigfive': bigfive_parser.parse_object({}),
146
+ 'personalities': personality_parser.parse_object({})
147
+ }}
148
  except Exception as e:
149
+ print(f"Unexpected error: {e}")
150
+ return {"Unknown Speaker": {
151
+ 'attachments': attachment_parser.parse_object({}),
152
+ 'bigfive': bigfive_parser.parse_object({}),
153
+ 'personalities': personality_parser.parse_object({})
154
+ }}
155
+
156
+ # For testing purposes
157
+ if __name__ == "__main__":
158
+ test_input = "This is a test input for processing."
159
+ result = process_input(test_input, llm)
160
+ print("\nProcessing completed.")
161
+ print("Final results:")
162
+ print(json.dumps(result, indent=2))