Spaces:

Zwounds
/

FormatReview

Sleeping

App Files Files Community

Stephen Zweibel commited on Jun 29

Commit

ec0096e

1 Parent(s): 4435587

Update app for Hugging Face

Browse files

Files changed (1) hide show

rule_extractor.py +25 -10

rule_extractor.py CHANGED Viewed

@@ -202,21 +202,36 @@ def get_rules_from_url(url: str) -> str:
                 return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
             if result.success and result.extracted_content:
-                # Format the extracted data into a readable string
-                if isinstance(result.extracted_content, list) and len(result.extracted_content) > 0:
-                    rules_data = result.extracted_content[0]
-                elif isinstance(result.extracted_content, dict):
-                    rules_data = result.extracted_content
                 else:
-                    # If it's a string or other type, use markdown as fallback
-                    return str(result.extracted_content) if result.extracted_content else result.markdown if result.markdown else "Could not extract formatting rules from the provided URL."
                 # Store the raw data for debugging
-                logger.info(f"Extracted rules data: {json.dumps(rules_data, indent=2)}")
                 # Format the rules for display
                 formatted_rules = format_rules_for_display(rules_data)
-                logger.info(f"Formatted rules: {formatted_rules[:100]}...")  # Log for debugging
                 return formatted_rules
             elif result.success and result.markdown:
                 # Fallback to markdown if structured extraction fails

                 return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
             if result.success and result.extracted_content:
+                # The extracted content is often a list containing a JSON string.
+                raw_data = result.extracted_content
+                if isinstance(raw_data, list) and len(raw_data) > 0:
+                    raw_data = raw_data[0]
+                # Ensure we have a dictionary to work with
+                if isinstance(raw_data, str):
+                    try:
+                        rules_data = json.loads(raw_data)
+                        # If the parsed data is a list, take the first element
+                        if isinstance(rules_data, list) and len(rules_data) > 0:
+                            rules_data = rules_data[0]
+                    except json.JSONDecodeError:
+                        logger.error(f"Failed to parse JSON from extracted content: {raw_data}")
+                        return "Failed to parse the extracted formatting rules."
+                elif isinstance(raw_data, dict):
+                    rules_data = raw_data
                 else:
+                    logger.warning(f"Unexpected type for extracted content: {type(raw_data)}")
+                    return "Could not process the extracted formatting rules."
                 # Store the raw data for debugging
+                logger.info(f"Parsed rules data: {json.dumps(rules_data, indent=2)}")
                 # Format the rules for display
                 formatted_rules = format_rules_for_display(rules_data)
+                if not formatted_rules:
+                    return "Failed to format the extracted rules."
+                logger.info(f"Formatted rules: {formatted_rules[:100]}...")
                 return formatted_rules
             elif result.success and result.markdown:
                 # Fallback to markdown if structured extraction fails