Spaces:
Sleeping
Sleeping
Stephen Zweibel
commited on
Commit
·
ec0096e
1
Parent(s):
4435587
Update app for Hugging Face
Browse files- rule_extractor.py +25 -10
rule_extractor.py
CHANGED
@@ -202,21 +202,36 @@ def get_rules_from_url(url: str) -> str:
|
|
202 |
return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
|
203 |
|
204 |
if result.success and result.extracted_content:
|
205 |
-
#
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
else:
|
211 |
-
|
212 |
-
return
|
213 |
-
|
214 |
# Store the raw data for debugging
|
215 |
-
logger.info(f"
|
216 |
|
217 |
# Format the rules for display
|
218 |
formatted_rules = format_rules_for_display(rules_data)
|
219 |
-
|
|
|
|
|
|
|
220 |
return formatted_rules
|
221 |
elif result.success and result.markdown:
|
222 |
# Fallback to markdown if structured extraction fails
|
|
|
202 |
return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
|
203 |
|
204 |
if result.success and result.extracted_content:
|
205 |
+
# The extracted content is often a list containing a JSON string.
|
206 |
+
raw_data = result.extracted_content
|
207 |
+
if isinstance(raw_data, list) and len(raw_data) > 0:
|
208 |
+
raw_data = raw_data[0]
|
209 |
+
|
210 |
+
# Ensure we have a dictionary to work with
|
211 |
+
if isinstance(raw_data, str):
|
212 |
+
try:
|
213 |
+
rules_data = json.loads(raw_data)
|
214 |
+
# If the parsed data is a list, take the first element
|
215 |
+
if isinstance(rules_data, list) and len(rules_data) > 0:
|
216 |
+
rules_data = rules_data[0]
|
217 |
+
except json.JSONDecodeError:
|
218 |
+
logger.error(f"Failed to parse JSON from extracted content: {raw_data}")
|
219 |
+
return "Failed to parse the extracted formatting rules."
|
220 |
+
elif isinstance(raw_data, dict):
|
221 |
+
rules_data = raw_data
|
222 |
else:
|
223 |
+
logger.warning(f"Unexpected type for extracted content: {type(raw_data)}")
|
224 |
+
return "Could not process the extracted formatting rules."
|
225 |
+
|
226 |
# Store the raw data for debugging
|
227 |
+
logger.info(f"Parsed rules data: {json.dumps(rules_data, indent=2)}")
|
228 |
|
229 |
# Format the rules for display
|
230 |
formatted_rules = format_rules_for_display(rules_data)
|
231 |
+
if not formatted_rules:
|
232 |
+
return "Failed to format the extracted rules."
|
233 |
+
|
234 |
+
logger.info(f"Formatted rules: {formatted_rules[:100]}...")
|
235 |
return formatted_rules
|
236 |
elif result.success and result.markdown:
|
237 |
# Fallback to markdown if structured extraction fails
|