Spaces:

Zwounds
/

FormatReview

Sleeping

App Files Files Community

Zwounds commited on Jun 29

Commit

d6fb454

verified ·

1 Parent(s): 72542d2

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

rule_extractor.py +39 -39

rule_extractor.py CHANGED Viewed

@@ -200,46 +200,46 @@ def get_rules_from_url(url: str) -> str:
             except Exception as fallback_e:
                 logger.error(f"Fallback HTTP request also failed: {fallback_e}")
                 return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
-            if result.success and result.extracted_content:
-                # The extracted content is often a list containing a JSON string.
-                raw_data = result.extracted_content
-                if isinstance(raw_data, list) and len(raw_data) > 0:
-                    raw_data = raw_data[0]
-                # Ensure we have a dictionary to work with
-                if isinstance(raw_data, str):
-                    try:
-                        rules_data = json.loads(raw_data)
-                        # If the parsed data is a list, take the first element
-                        if isinstance(rules_data, list) and len(rules_data) > 0:
-                            rules_data = rules_data[0]
-                    except json.JSONDecodeError:
-                        logger.error(f"Failed to parse JSON from extracted content: {raw_data}")
-                        return "Failed to parse the extracted formatting rules."
-                elif isinstance(raw_data, dict):
-                    rules_data = raw_data
-                else:
-                    logger.warning(f"Unexpected type for extracted content: {type(raw_data)}")
-                    return "Could not process the extracted formatting rules."
-                # Store the raw data for debugging
-                logger.info(f"Parsed rules data: {json.dumps(rules_data, indent=2)}")
-                # Format the rules for display
-                formatted_rules = format_rules_for_display(rules_data)
-                if not formatted_rules:
-                    return "Failed to format the extracted rules."
-                logger.info(f"Formatted rules: {formatted_rules[:100]}...")
-                return formatted_rules
-            elif result.success and result.markdown:
-                # Fallback to markdown if structured extraction fails
-                logger.info(f"Extraction failed, falling back to markdown for {url}")
-                return result.markdown
             else:
-                logger.warning(f"Failed to extract rules or markdown for {url}. Crawler success: {result.success}")
-                return "Could not extract formatting rules from the provided URL. The crawler did not return any content."
     # Run the async function using the patched event loop
     return asyncio.run(_extract_rules_async(url))

             except Exception as fallback_e:
                 logger.error(f"Fallback HTTP request also failed: {fallback_e}")
                 return f"Failed to extract rules from {url}. Both browser and HTTP extraction failed. Error: {str(e)}"
+        if result.success and result.extracted_content:
+            # The extracted content is often a list containing a JSON string.
+            raw_data = result.extracted_content
+            if isinstance(raw_data, list) and len(raw_data) > 0:
+                raw_data = raw_data[0]
+            # Ensure we have a dictionary to work with
+            if isinstance(raw_data, str):
+                try:
+                    rules_data = json.loads(raw_data)
+                    # If the parsed data is a list, take the first element
+                    if isinstance(rules_data, list) and len(rules_data) > 0:
+                        rules_data = rules_data[0]
+                except json.JSONDecodeError:
+                    logger.error(f"Failed to parse JSON from extracted content: {raw_data}")
+                    return "Failed to parse the extracted formatting rules."
+            elif isinstance(raw_data, dict):
+                rules_data = raw_data
             else:
+                logger.warning(f"Unexpected type for extracted content: {type(raw_data)}")
+                return "Could not process the extracted formatting rules."
+            # Store the raw data for debugging
+            logger.info(f"Parsed rules data: {json.dumps(rules_data, indent=2)}")
+            # Format the rules for display
+            formatted_rules = format_rules_for_display(rules_data)
+            if not formatted_rules:
+                return "Failed to format the extracted rules."
+            logger.info(f"Formatted rules: {formatted_rules[:100]}...")
+            return formatted_rules
+        elif result.success and result.markdown:
+            # Fallback to markdown if structured extraction fails
+            logger.info(f"Extraction failed, falling back to markdown for {url}")
+            return result.markdown
+        else:
+            logger.warning(f"Failed to extract rules or markdown for {url}. Crawler success: {result.success}")
+            return "Could not extract formatting rules from the provided URL. The crawler did not return any content."
     # Run the async function using the patched event loop
     return asyncio.run(_extract_rules_async(url))