victor HF Staff commited on
Commit
bdd9f2a
·
1 Parent(s): d2e2bdc

Refactor HTML extraction logic to streamline response handling and improve error messaging

Browse files
Files changed (1) hide show
  1. app.py +7 -78
app.py CHANGED
@@ -94,89 +94,18 @@ Focus on creating clean, spacious, and well-proportioned designs that feel premi
94
 
95
  def extract_html_from_response(response_text):
96
  """Extract HTML code from model response"""
97
-
98
- # First, try to find HTML within code blocks
99
  html_pattern = r"```(?:html)?\s*(.*?)```"
100
  matches = re.findall(html_pattern, response_text, re.DOTALL | re.IGNORECASE)
101
-
102
  if matches:
103
  return matches[0].strip()
104
-
105
- # If no code blocks, look for HTML tags in the response
106
  if re.search(r"<[^>]+>", response_text):
107
- # Find the first HTML tag
108
- html_start = re.search(r"<", response_text)
109
- if not html_start:
110
- raise ValueError("No HTML content found in the response")
111
-
112
- # Start from the first HTML tag
113
- content_from_first_tag = response_text[html_start.start() :]
114
-
115
- # Split into lines and process
116
- lines = content_from_first_tag.split("\n")
117
- html_lines = []
118
-
119
- for line in lines:
120
- line_stripped = line.strip()
121
-
122
- # Skip empty lines at the beginning
123
- if not line_stripped and not html_lines:
124
- continue
125
-
126
- # Stop if we hit obvious explanation text after we have HTML
127
- if html_lines and line_stripped:
128
- # Check if this line looks like explanation rather than HTML
129
- if not re.search(r"<[^>]*>", line_stripped):
130
- # If it's a long explanatory sentence, stop here
131
- if len(line_stripped) > 80 or any(
132
- phrase in line_stripped.lower()
133
- for phrase in [
134
- "this creates",
135
- "this provides",
136
- "the form",
137
- "this design",
138
- "here's",
139
- "this code",
140
- "explanation:",
141
- "note:",
142
- "features:",
143
- "improvements:",
144
- ]
145
- ):
146
- break
147
-
148
- html_lines.append(line.rstrip())
149
-
150
- # If we have HTML and hit a line that ends with a period and looks like explanation, stop
151
- if html_lines and line_stripped.endswith(".") and len(line_stripped) > 40:
152
- if not re.search(r"<[^>]*>", line_stripped):
153
- html_lines.pop() # Remove the explanatory line
154
- break
155
-
156
- # Join and clean up
157
- html_content = "\n".join(html_lines).strip()
158
-
159
- # Remove any trailing text after the last complete HTML tag
160
- # Find the last complete tag (either closing tag or self-closing)
161
- last_complete_tag = None
162
- for match in re.finditer(r"<[^>]+>", html_content):
163
- tag = match.group()
164
- if (
165
- tag.startswith("</")
166
- or tag.endswith("/>")
167
- or any(
168
- void in tag.lower()
169
- for void in ["<input", "<img", "<br", "<hr", "<meta", "<link"]
170
- )
171
- ):
172
- last_complete_tag = match
173
-
174
- if last_complete_tag:
175
- # Keep everything up to and including the last complete tag
176
- html_content = html_content[: last_complete_tag.end()]
177
-
178
- return html_content
179
-
180
  # If no HTML found, return error
181
  raise ValueError("No HTML content found in the response")
182
 
 
94
 
95
  def extract_html_from_response(response_text):
96
  """Extract HTML code from model response"""
97
+
98
+ # Try to find HTML within code blocks first
99
  html_pattern = r"```(?:html)?\s*(.*?)```"
100
  matches = re.findall(html_pattern, response_text, re.DOTALL | re.IGNORECASE)
101
+
102
  if matches:
103
  return matches[0].strip()
104
+
105
+ # If no code blocks but response contains HTML tags, return the whole response
106
  if re.search(r"<[^>]+>", response_text):
107
+ return response_text.strip()
108
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  # If no HTML found, return error
110
  raise ValueError("No HTML content found in the response")
111