hello
Browse files- analyzer.py +146 -31
- app.py +7 -5
analyzer.py
CHANGED
@@ -38,44 +38,159 @@ def analyze_code(code: str) -> str:
|
|
38 |
return response.choices[0].message.content
|
39 |
|
40 |
def parse_llm_json_response(response: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
try:
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
start = response.find('{')
|
46 |
end = response.rfind('}')
|
47 |
-
if start
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
json_str = re.sub(r'"(.*?)"', escape_inner_quotes, json_str)
|
73 |
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
except Exception as e:
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
81 |
"""
|
|
|
38 |
return response.choices[0].message.content
|
39 |
|
40 |
def parse_llm_json_response(response: str):
|
41 |
+
"""
|
42 |
+
Robust JSON parser with multiple fallback strategies for LLM responses.
|
43 |
+
"""
|
44 |
+
logger.info(f"Attempting to parse LLM response: {response[:100]}...")
|
45 |
+
|
46 |
+
# Strategy 1: Try direct JSON parsing (cleanest case)
|
47 |
+
try:
|
48 |
+
# Clean the response first
|
49 |
+
cleaned = response.strip()
|
50 |
+
result = json.loads(cleaned)
|
51 |
+
logger.info("β
Direct JSON parsing successful")
|
52 |
+
return result
|
53 |
+
except:
|
54 |
+
pass
|
55 |
+
|
56 |
+
# Strategy 2: Extract JSON block from response
|
57 |
try:
|
58 |
+
# Find the first complete JSON object
|
59 |
+
start = response.find('{')
|
60 |
+
if start == -1:
|
61 |
+
raise ValueError("No opening brace found")
|
62 |
+
|
63 |
+
# Find matching closing brace
|
64 |
+
brace_count = 0
|
65 |
+
end = start
|
66 |
+
for i, char in enumerate(response[start:], start):
|
67 |
+
if char == '{':
|
68 |
+
brace_count += 1
|
69 |
+
elif char == '}':
|
70 |
+
brace_count -= 1
|
71 |
+
if brace_count == 0:
|
72 |
+
end = i
|
73 |
+
break
|
74 |
+
|
75 |
+
if brace_count != 0:
|
76 |
+
# Fallback to last closing brace
|
77 |
+
end = response.rfind('}')
|
78 |
+
if end == -1 or end < start:
|
79 |
+
raise ValueError("No matching closing brace found")
|
80 |
|
81 |
+
json_str = response[start:end+1]
|
82 |
+
result = json.loads(json_str)
|
83 |
+
logger.info("β
JSON block extraction successful")
|
84 |
+
return result
|
85 |
+
except Exception as e:
|
86 |
+
logger.warning(f"JSON block extraction failed: {e}")
|
87 |
+
|
88 |
+
# Strategy 3: Clean and fix common JSON issues
|
89 |
+
try:
|
90 |
+
# Extract JSON part
|
91 |
start = response.find('{')
|
92 |
end = response.rfind('}')
|
93 |
+
if start != -1 and end != -1 and end > start:
|
94 |
+
json_str = response[start:end+1]
|
95 |
+
|
96 |
+
# Fix common issues
|
97 |
+
# Replace single quotes with double quotes (but be careful with contractions)
|
98 |
+
json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*[,}])", r'"\1"', json_str)
|
99 |
+
json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*:)", r'"\1"', json_str)
|
100 |
+
|
101 |
+
# Fix unescaped quotes in values
|
102 |
+
json_str = re.sub(r':\s*"([^"]*)"([^",}]*)"', r': "\1\2"', json_str)
|
103 |
+
|
104 |
+
# Remove trailing commas
|
105 |
+
json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
|
106 |
+
|
107 |
+
# Try parsing the cleaned version
|
108 |
+
result = json.loads(json_str)
|
109 |
+
logger.info("β
JSON cleaning and fixing successful")
|
110 |
+
return result
|
111 |
+
except Exception as e:
|
112 |
+
logger.warning(f"JSON cleaning failed: {e}")
|
113 |
+
|
114 |
+
# Strategy 4: Manual field extraction as last resort
|
115 |
+
try:
|
116 |
+
logger.info("Attempting manual field extraction...")
|
117 |
+
result = {}
|
|
|
118 |
|
119 |
+
# Extract each field using regex patterns
|
120 |
+
patterns = {
|
121 |
+
'strength': [
|
122 |
+
r'"strength"\s*:\s*"([^"]*)"',
|
123 |
+
r"'strength'\s*:\s*'([^']*)'",
|
124 |
+
r'strength[:\s]+"([^"]*)"',
|
125 |
+
r'strength[:\s]+\'([^\']*)\''
|
126 |
+
],
|
127 |
+
'weaknesses': [
|
128 |
+
r'"weaknesses"\s*:\s*"([^"]*)"',
|
129 |
+
r"'weaknesses'\s*:\s*'([^']*)'",
|
130 |
+
r'weaknesses[:\s]+"([^"]*)"',
|
131 |
+
r'weaknesses[:\s]+\'([^\']*)\''
|
132 |
+
],
|
133 |
+
'speciality': [
|
134 |
+
r'"speciality"\s*:\s*"([^"]*)"',
|
135 |
+
r"'speciality'\s*:\s*'([^']*)'",
|
136 |
+
r'speciality[:\s]+"([^"]*)"',
|
137 |
+
r'speciality[:\s]+\'([^\']*)\''
|
138 |
+
],
|
139 |
+
'relevance rating': [
|
140 |
+
r'"relevance rating"\s*:\s*"([^"]*)"',
|
141 |
+
r"'relevance rating'\s*:\s*'([^']*)'",
|
142 |
+
r'relevance[^:]*rating[:\s]+"([^"]*)"',
|
143 |
+
r'relevance[^:]*rating[:\s]+\'([^\']*)\''
|
144 |
+
]
|
145 |
+
}
|
146 |
+
|
147 |
+
for field, field_patterns in patterns.items():
|
148 |
+
found = False
|
149 |
+
for pattern in field_patterns:
|
150 |
+
match = re.search(pattern, response, re.IGNORECASE | re.DOTALL)
|
151 |
+
if match:
|
152 |
+
value = match.group(1).strip()
|
153 |
+
# Clean up the extracted value
|
154 |
+
value = re.sub(r'\\+(["\'])', r'\1', value) # Remove excessive escaping
|
155 |
+
value = value.replace('\\"', '"').replace("\\'", "'")
|
156 |
+
result[field] = value
|
157 |
+
found = True
|
158 |
+
break
|
159 |
+
|
160 |
+
if not found:
|
161 |
+
result[field] = ""
|
162 |
+
|
163 |
+
# Validate relevance rating
|
164 |
+
valid_ratings = ['very low', 'low', 'high', 'very high']
|
165 |
+
if result.get('relevance rating', '').lower() not in [r.lower() for r in valid_ratings]:
|
166 |
+
# Try to fix common variations
|
167 |
+
rating = result.get('relevance rating', '').lower()
|
168 |
+
if 'very' in rating and 'low' in rating:
|
169 |
+
result['relevance rating'] = 'very low'
|
170 |
+
elif 'very' in rating and 'high' in rating:
|
171 |
+
result['relevance rating'] = 'very high'
|
172 |
+
elif 'low' in rating:
|
173 |
+
result['relevance rating'] = 'low'
|
174 |
+
elif 'high' in rating:
|
175 |
+
result['relevance rating'] = 'high'
|
176 |
+
else:
|
177 |
+
result['relevance rating'] = 'low' # Default fallback
|
178 |
+
|
179 |
+
logger.info("β
Manual field extraction successful")
|
180 |
+
return result
|
181 |
|
182 |
except Exception as e:
|
183 |
+
logger.warning(f"Manual extraction failed: {e}")
|
184 |
+
|
185 |
+
# Strategy 5: Complete fallback with empty values
|
186 |
+
logger.error("All JSON parsing strategies failed, returning empty structure")
|
187 |
+
return {
|
188 |
+
"strength": "Analysis could not be completed - please try again",
|
189 |
+
"weaknesses": "Analysis could not be completed - please try again",
|
190 |
+
"speciality": "Analysis could not be completed - please try again",
|
191 |
+
"relevance rating": "low",
|
192 |
+
"error": f"Failed to parse LLM response after all strategies. Raw: {response[:200]}..."
|
193 |
+
}
|
194 |
|
195 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
196 |
"""
|
app.py
CHANGED
@@ -1036,12 +1036,13 @@ def create_ui() -> gr.Blocks:
|
|
1036 |
return gr.update(visible=False), hf_url
|
1037 |
return gr.update(visible=False), ""
|
1038 |
|
1039 |
-
def handle_explore_repo(repo_id: str) -> Tuple[Any, Any]:
|
1040 |
-
"""Handle navigating to the repo explorer
|
1041 |
-
logger.info(f"Switching to repo explorer tab")
|
1042 |
return (
|
1043 |
gr.update(visible=False), # close modal
|
1044 |
-
gr.update(selected="repo_explorer_tab") # switch tab
|
|
|
1045 |
)
|
1046 |
|
1047 |
def handle_cancel_modal() -> Any:
|
@@ -1192,7 +1193,8 @@ def create_ui() -> gr.Blocks:
|
|
1192 |
inputs=[selected_repo_display],
|
1193 |
outputs=[
|
1194 |
repo_action_modal,
|
1195 |
-
tabs
|
|
|
1196 |
],
|
1197 |
js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
|
1198 |
)
|
|
|
1036 |
return gr.update(visible=False), hf_url
|
1037 |
return gr.update(visible=False), ""
|
1038 |
|
1039 |
+
def handle_explore_repo(repo_id: str) -> Tuple[Any, Any, Any]:
|
1040 |
+
"""Handle navigating to the repo explorer and setting the repo ID."""
|
1041 |
+
logger.info(f"Switching to repo explorer tab with repo: {repo_id}")
|
1042 |
return (
|
1043 |
gr.update(visible=False), # close modal
|
1044 |
+
gr.update(selected="repo_explorer_tab"), # switch tab
|
1045 |
+
gr.update(value=repo_id if repo_id else "") # set repo ID
|
1046 |
)
|
1047 |
|
1048 |
def handle_cancel_modal() -> Any:
|
|
|
1193 |
inputs=[selected_repo_display],
|
1194 |
outputs=[
|
1195 |
repo_action_modal,
|
1196 |
+
tabs,
|
1197 |
+
repo_components["repo_explorer_input"]
|
1198 |
],
|
1199 |
js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
|
1200 |
)
|