Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Refactor HTML extraction logic to streamline response handling and improve error messaging
Browse files
app.py
CHANGED
@@ -94,89 +94,18 @@ Focus on creating clean, spacious, and well-proportioned designs that feel premi
|
|
94 |
|
95 |
def extract_html_from_response(response_text):
|
96 |
"""Extract HTML code from model response"""
|
97 |
-
|
98 |
-
#
|
99 |
html_pattern = r"```(?:html)?\s*(.*?)```"
|
100 |
matches = re.findall(html_pattern, response_text, re.DOTALL | re.IGNORECASE)
|
101 |
-
|
102 |
if matches:
|
103 |
return matches[0].strip()
|
104 |
-
|
105 |
-
# If no code blocks
|
106 |
if re.search(r"<[^>]+>", response_text):
|
107 |
-
|
108 |
-
|
109 |
-
if not html_start:
|
110 |
-
raise ValueError("No HTML content found in the response")
|
111 |
-
|
112 |
-
# Start from the first HTML tag
|
113 |
-
content_from_first_tag = response_text[html_start.start() :]
|
114 |
-
|
115 |
-
# Split into lines and process
|
116 |
-
lines = content_from_first_tag.split("\n")
|
117 |
-
html_lines = []
|
118 |
-
|
119 |
-
for line in lines:
|
120 |
-
line_stripped = line.strip()
|
121 |
-
|
122 |
-
# Skip empty lines at the beginning
|
123 |
-
if not line_stripped and not html_lines:
|
124 |
-
continue
|
125 |
-
|
126 |
-
# Stop if we hit obvious explanation text after we have HTML
|
127 |
-
if html_lines and line_stripped:
|
128 |
-
# Check if this line looks like explanation rather than HTML
|
129 |
-
if not re.search(r"<[^>]*>", line_stripped):
|
130 |
-
# If it's a long explanatory sentence, stop here
|
131 |
-
if len(line_stripped) > 80 or any(
|
132 |
-
phrase in line_stripped.lower()
|
133 |
-
for phrase in [
|
134 |
-
"this creates",
|
135 |
-
"this provides",
|
136 |
-
"the form",
|
137 |
-
"this design",
|
138 |
-
"here's",
|
139 |
-
"this code",
|
140 |
-
"explanation:",
|
141 |
-
"note:",
|
142 |
-
"features:",
|
143 |
-
"improvements:",
|
144 |
-
]
|
145 |
-
):
|
146 |
-
break
|
147 |
-
|
148 |
-
html_lines.append(line.rstrip())
|
149 |
-
|
150 |
-
# If we have HTML and hit a line that ends with a period and looks like explanation, stop
|
151 |
-
if html_lines and line_stripped.endswith(".") and len(line_stripped) > 40:
|
152 |
-
if not re.search(r"<[^>]*>", line_stripped):
|
153 |
-
html_lines.pop() # Remove the explanatory line
|
154 |
-
break
|
155 |
-
|
156 |
-
# Join and clean up
|
157 |
-
html_content = "\n".join(html_lines).strip()
|
158 |
-
|
159 |
-
# Remove any trailing text after the last complete HTML tag
|
160 |
-
# Find the last complete tag (either closing tag or self-closing)
|
161 |
-
last_complete_tag = None
|
162 |
-
for match in re.finditer(r"<[^>]+>", html_content):
|
163 |
-
tag = match.group()
|
164 |
-
if (
|
165 |
-
tag.startswith("</")
|
166 |
-
or tag.endswith("/>")
|
167 |
-
or any(
|
168 |
-
void in tag.lower()
|
169 |
-
for void in ["<input", "<img", "<br", "<hr", "<meta", "<link"]
|
170 |
-
)
|
171 |
-
):
|
172 |
-
last_complete_tag = match
|
173 |
-
|
174 |
-
if last_complete_tag:
|
175 |
-
# Keep everything up to and including the last complete tag
|
176 |
-
html_content = html_content[: last_complete_tag.end()]
|
177 |
-
|
178 |
-
return html_content
|
179 |
-
|
180 |
# If no HTML found, return error
|
181 |
raise ValueError("No HTML content found in the response")
|
182 |
|
|
|
94 |
|
95 |
def extract_html_from_response(response_text):
|
96 |
"""Extract HTML code from model response"""
|
97 |
+
|
98 |
+
# Try to find HTML within code blocks first
|
99 |
html_pattern = r"```(?:html)?\s*(.*?)```"
|
100 |
matches = re.findall(html_pattern, response_text, re.DOTALL | re.IGNORECASE)
|
101 |
+
|
102 |
if matches:
|
103 |
return matches[0].strip()
|
104 |
+
|
105 |
+
# If no code blocks but response contains HTML tags, return the whole response
|
106 |
if re.search(r"<[^>]+>", response_text):
|
107 |
+
return response_text.strip()
|
108 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
# If no HTML found, return error
|
110 |
raise ValueError("No HTML content found in the response")
|
111 |
|