quoc-khanh commited on
Commit
49cf9eb
·
verified ·
1 Parent(s): ddbf564

Update helpers.py

Browse files
Files changed (1) hide show
  1. helpers.py +35 -13
helpers.py CHANGED
@@ -131,20 +131,42 @@ key = os.getenv("GOOGLE_API_KEY")
131
  # return DoclingLoader(file_path=file_path, chunker=chunker # This will break your doc into manageable pieces.
132
  # ).load()
133
 
134
- def extract_metadata(input_string):
135
- # Use regex to find the content inside curly braces
136
- match = re.search(r'\{.*?\}', input_string)
137
- if match:
138
- metadata_str = match.group() # This returns the substring with the braces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  try:
140
- # Safely evaluate the string to a dictionary
141
- new_metadata = ast.literal_eval(metadata_str)
142
- except Exception as e:
143
- print(f"Error evaluating metadata: {e}")
144
- new_metadata = {}
145
- else:
146
- new_metadata = None
147
- return new_metadata
 
 
148
 
149
  # # Example usage:
150
  # input_str = "Some random text before and then {'a': 'abc', 'b': 'bcd'} and some random text after."
 
131
  # return DoclingLoader(file_path=file_path, chunker=chunker # This will break your doc into manageable pieces.
132
  # ).load()
133
 
134
+ # def extract_metadata(input_string):
135
+ # # Use regex to find the content inside curly braces
136
+ # match = re.search(r'\{.*?\}', input_string)
137
+ # if match:
138
+ # metadata_str = match.group() # This returns the substring with the braces
139
+ # try:
140
+ # # Safely evaluate the string to a dictionary
141
+ # new_metadata = ast.literal_eval(metadata_str)
142
+ # except Exception as e:
143
+ # print(f"Error evaluating metadata: {e}")
144
+ # new_metadata = {}
145
+ # else:
146
+ # new_metadata = None
147
+ # return new_metadata
148
+
149
+ def extract_metadata(response_string):
150
+ # Tìm tất cả các dictionary trong chuỗi đầu vào
151
+ matches = re.findall(r'\{.*?\}', response_string, re.DOTALL)
152
+ if not matches:
153
+ return None
154
+
155
+ smallest_dict = None
156
+ min_length = float("inf")
157
+
158
+ for match in matches:
159
  try:
160
+ parsed_dict = ast.literal_eval(match) # Chuyển đổi string thành dictionary
161
+ if isinstance(parsed_dict, dict):
162
+ dict_length = len(str(parsed_dict)) # Độ dài chuỗi của dict
163
+ if dict_length < min_length:
164
+ smallest_dict = parsed_dict
165
+ min_length = dict_length
166
+ except Exception:
167
+ continue # Bỏ qua nếu không phải dictionary hợp lệ
168
+
169
+ return smallest_dict
170
 
171
  # # Example usage:
172
  # input_str = "Some random text before and then {'a': 'abc', 'b': 'bcd'} and some random text after."