Spaces:
Runtime error
Runtime error
Update helpers.py
Browse files- helpers.py +35 -13
helpers.py
CHANGED
@@ -131,20 +131,42 @@ key = os.getenv("GOOGLE_API_KEY")
|
|
131 |
# return DoclingLoader(file_path=file_path, chunker=chunker # This will break your doc into manageable pieces.
|
132 |
# ).load()
|
133 |
|
134 |
-
def extract_metadata(input_string):
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
try:
|
140 |
-
#
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
|
|
|
|
148 |
|
149 |
# # Example usage:
|
150 |
# input_str = "Some random text before and then {'a': 'abc', 'b': 'bcd'} and some random text after."
|
|
|
131 |
# return DoclingLoader(file_path=file_path, chunker=chunker # This will break your doc into manageable pieces.
|
132 |
# ).load()
|
133 |
|
134 |
+
# def extract_metadata(input_string):
|
135 |
+
# # Use regex to find the content inside curly braces
|
136 |
+
# match = re.search(r'\{.*?\}', input_string)
|
137 |
+
# if match:
|
138 |
+
# metadata_str = match.group() # This returns the substring with the braces
|
139 |
+
# try:
|
140 |
+
# # Safely evaluate the string to a dictionary
|
141 |
+
# new_metadata = ast.literal_eval(metadata_str)
|
142 |
+
# except Exception as e:
|
143 |
+
# print(f"Error evaluating metadata: {e}")
|
144 |
+
# new_metadata = {}
|
145 |
+
# else:
|
146 |
+
# new_metadata = None
|
147 |
+
# return new_metadata
|
148 |
+
|
149 |
+
def extract_metadata(response_string):
|
150 |
+
# Tìm tất cả các dictionary trong chuỗi đầu vào
|
151 |
+
matches = re.findall(r'\{.*?\}', response_string, re.DOTALL)
|
152 |
+
if not matches:
|
153 |
+
return None
|
154 |
+
|
155 |
+
smallest_dict = None
|
156 |
+
min_length = float("inf")
|
157 |
+
|
158 |
+
for match in matches:
|
159 |
try:
|
160 |
+
parsed_dict = ast.literal_eval(match) # Chuyển đổi string thành dictionary
|
161 |
+
if isinstance(parsed_dict, dict):
|
162 |
+
dict_length = len(str(parsed_dict)) # Độ dài chuỗi của dict
|
163 |
+
if dict_length < min_length:
|
164 |
+
smallest_dict = parsed_dict
|
165 |
+
min_length = dict_length
|
166 |
+
except Exception:
|
167 |
+
continue # Bỏ qua nếu không phải dictionary hợp lệ
|
168 |
+
|
169 |
+
return smallest_dict
|
170 |
|
171 |
# # Example usage:
|
172 |
# input_str = "Some random text before and then {'a': 'abc', 'b': 'bcd'} and some random text after."
|