naman1102 commited on
Commit
b254d56
Β·
1 Parent(s): b60c631
Files changed (2) hide show
  1. analyzer.py +146 -31
  2. app.py +7 -5
analyzer.py CHANGED
@@ -38,44 +38,159 @@ def analyze_code(code: str) -> str:
38
  return response.choices[0].message.content
39
 
40
  def parse_llm_json_response(response: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  try:
42
- print("DEBUGGGGG ::: ", response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # 1. Extract the JSON object part of the string
 
 
 
 
 
 
 
 
 
45
  start = response.find('{')
46
  end = response.rfind('}')
47
- if start == -1 or end == -1 or end < start:
48
- raise ValueError("No valid JSON object found in the response.")
49
- json_str = response[start:end+1]
50
-
51
- # 2. Replace single quotes used for keys/values with double quotes.
52
- # This handles cases like {'key': 'value'}
53
- json_str = re.sub(r"'", '"', json_str)
54
-
55
- # 3. Find all string values and escape any unescaped double quotes inside them.
56
- # This uses a function as the replacement in re.sub
57
- def escape_inner_quotes(match):
58
- # The match object gives us the full string matched by the regex.
59
- # We take the part between the outer quotes (group 1)
60
- # and replace any \" with a temporary unique placeholder.
61
- # Then, we replace any remaining " with \", and finally
62
- # restore the original escaped quotes.
63
- inner_content = match.group(1)
64
- placeholder = "___TEMP_QUOTE___"
65
- inner_content = inner_content.replace('\\"', placeholder)
66
- inner_content = inner_content.replace('"', '\\"')
67
- inner_content = inner_content.replace(placeholder, '\\"')
68
- return f'"{inner_content}"'
69
-
70
- # This regex finds a double quote, captures everything until the next double quote,
71
- # and then applies the function to that captured group.
72
- json_str = re.sub(r'"(.*?)"', escape_inner_quotes, json_str)
73
 
74
- return json.loads(json_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  except Exception as e:
77
- print("DEBUGGGGG error ::: ", e)
78
- return {"error": f"Failed to parse JSON: {e}", "raw": response}
 
 
 
 
 
 
 
 
 
79
 
80
  def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
81
  """
 
38
  return response.choices[0].message.content
39
 
40
  def parse_llm_json_response(response: str):
41
+ """
42
+ Robust JSON parser with multiple fallback strategies for LLM responses.
43
+ """
44
+ logger.info(f"Attempting to parse LLM response: {response[:100]}...")
45
+
46
+ # Strategy 1: Try direct JSON parsing (cleanest case)
47
+ try:
48
+ # Clean the response first
49
+ cleaned = response.strip()
50
+ result = json.loads(cleaned)
51
+ logger.info("βœ… Direct JSON parsing successful")
52
+ return result
53
+ except:
54
+ pass
55
+
56
+ # Strategy 2: Extract JSON block from response
57
  try:
58
+ # Find the first complete JSON object
59
+ start = response.find('{')
60
+ if start == -1:
61
+ raise ValueError("No opening brace found")
62
+
63
+ # Find matching closing brace
64
+ brace_count = 0
65
+ end = start
66
+ for i, char in enumerate(response[start:], start):
67
+ if char == '{':
68
+ brace_count += 1
69
+ elif char == '}':
70
+ brace_count -= 1
71
+ if brace_count == 0:
72
+ end = i
73
+ break
74
+
75
+ if brace_count != 0:
76
+ # Fallback to last closing brace
77
+ end = response.rfind('}')
78
+ if end == -1 or end < start:
79
+ raise ValueError("No matching closing brace found")
80
 
81
+ json_str = response[start:end+1]
82
+ result = json.loads(json_str)
83
+ logger.info("βœ… JSON block extraction successful")
84
+ return result
85
+ except Exception as e:
86
+ logger.warning(f"JSON block extraction failed: {e}")
87
+
88
+ # Strategy 3: Clean and fix common JSON issues
89
+ try:
90
+ # Extract JSON part
91
  start = response.find('{')
92
  end = response.rfind('}')
93
+ if start != -1 and end != -1 and end > start:
94
+ json_str = response[start:end+1]
95
+
96
+ # Fix common issues
97
+ # Replace single quotes with double quotes (but be careful with contractions)
98
+ json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*[,}])", r'"\1"', json_str)
99
+ json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*:)", r'"\1"', json_str)
100
+
101
+ # Fix unescaped quotes in values
102
+ json_str = re.sub(r':\s*"([^"]*)"([^",}]*)"', r': "\1\2"', json_str)
103
+
104
+ # Remove trailing commas
105
+ json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
106
+
107
+ # Try parsing the cleaned version
108
+ result = json.loads(json_str)
109
+ logger.info("βœ… JSON cleaning and fixing successful")
110
+ return result
111
+ except Exception as e:
112
+ logger.warning(f"JSON cleaning failed: {e}")
113
+
114
+ # Strategy 4: Manual field extraction as last resort
115
+ try:
116
+ logger.info("Attempting manual field extraction...")
117
+ result = {}
 
118
 
119
+ # Extract each field using regex patterns
120
+ patterns = {
121
+ 'strength': [
122
+ r'"strength"\s*:\s*"([^"]*)"',
123
+ r"'strength'\s*:\s*'([^']*)'",
124
+ r'strength[:\s]+"([^"]*)"',
125
+ r'strength[:\s]+\'([^\']*)\''
126
+ ],
127
+ 'weaknesses': [
128
+ r'"weaknesses"\s*:\s*"([^"]*)"',
129
+ r"'weaknesses'\s*:\s*'([^']*)'",
130
+ r'weaknesses[:\s]+"([^"]*)"',
131
+ r'weaknesses[:\s]+\'([^\']*)\''
132
+ ],
133
+ 'speciality': [
134
+ r'"speciality"\s*:\s*"([^"]*)"',
135
+ r"'speciality'\s*:\s*'([^']*)'",
136
+ r'speciality[:\s]+"([^"]*)"',
137
+ r'speciality[:\s]+\'([^\']*)\''
138
+ ],
139
+ 'relevance rating': [
140
+ r'"relevance rating"\s*:\s*"([^"]*)"',
141
+ r"'relevance rating'\s*:\s*'([^']*)'",
142
+ r'relevance[^:]*rating[:\s]+"([^"]*)"',
143
+ r'relevance[^:]*rating[:\s]+\'([^\']*)\''
144
+ ]
145
+ }
146
+
147
+ for field, field_patterns in patterns.items():
148
+ found = False
149
+ for pattern in field_patterns:
150
+ match = re.search(pattern, response, re.IGNORECASE | re.DOTALL)
151
+ if match:
152
+ value = match.group(1).strip()
153
+ # Clean up the extracted value
154
+ value = re.sub(r'\\+(["\'])', r'\1', value) # Remove excessive escaping
155
+ value = value.replace('\\"', '"').replace("\\'", "'")
156
+ result[field] = value
157
+ found = True
158
+ break
159
+
160
+ if not found:
161
+ result[field] = ""
162
+
163
+ # Validate relevance rating
164
+ valid_ratings = ['very low', 'low', 'high', 'very high']
165
+ if result.get('relevance rating', '').lower() not in [r.lower() for r in valid_ratings]:
166
+ # Try to fix common variations
167
+ rating = result.get('relevance rating', '').lower()
168
+ if 'very' in rating and 'low' in rating:
169
+ result['relevance rating'] = 'very low'
170
+ elif 'very' in rating and 'high' in rating:
171
+ result['relevance rating'] = 'very high'
172
+ elif 'low' in rating:
173
+ result['relevance rating'] = 'low'
174
+ elif 'high' in rating:
175
+ result['relevance rating'] = 'high'
176
+ else:
177
+ result['relevance rating'] = 'low' # Default fallback
178
+
179
+ logger.info("βœ… Manual field extraction successful")
180
+ return result
181
 
182
  except Exception as e:
183
+ logger.warning(f"Manual extraction failed: {e}")
184
+
185
+ # Strategy 5: Complete fallback with empty values
186
+ logger.error("All JSON parsing strategies failed, returning empty structure")
187
+ return {
188
+ "strength": "Analysis could not be completed - please try again",
189
+ "weaknesses": "Analysis could not be completed - please try again",
190
+ "speciality": "Analysis could not be completed - please try again",
191
+ "relevance rating": "low",
192
+ "error": f"Failed to parse LLM response after all strategies. Raw: {response[:200]}..."
193
+ }
194
 
195
  def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
196
  """
app.py CHANGED
@@ -1036,12 +1036,13 @@ def create_ui() -> gr.Blocks:
1036
  return gr.update(visible=False), hf_url
1037
  return gr.update(visible=False), ""
1038
 
1039
- def handle_explore_repo(repo_id: str) -> Tuple[Any, Any]:
1040
- """Handle navigating to the repo explorer - simple version."""
1041
- logger.info(f"Switching to repo explorer tab")
1042
  return (
1043
  gr.update(visible=False), # close modal
1044
- gr.update(selected="repo_explorer_tab") # switch tab
 
1045
  )
1046
 
1047
  def handle_cancel_modal() -> Any:
@@ -1192,7 +1193,8 @@ def create_ui() -> gr.Blocks:
1192
  inputs=[selected_repo_display],
1193
  outputs=[
1194
  repo_action_modal,
1195
- tabs
 
1196
  ],
1197
  js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
1198
  )
 
1036
  return gr.update(visible=False), hf_url
1037
  return gr.update(visible=False), ""
1038
 
1039
+ def handle_explore_repo(repo_id: str) -> Tuple[Any, Any, Any]:
1040
+ """Handle navigating to the repo explorer and setting the repo ID."""
1041
+ logger.info(f"Switching to repo explorer tab with repo: {repo_id}")
1042
  return (
1043
  gr.update(visible=False), # close modal
1044
+ gr.update(selected="repo_explorer_tab"), # switch tab
1045
+ gr.update(value=repo_id if repo_id else "") # set repo ID
1046
  )
1047
 
1048
  def handle_cancel_modal() -> Any:
 
1193
  inputs=[selected_repo_display],
1194
  outputs=[
1195
  repo_action_modal,
1196
+ tabs,
1197
+ repo_components["repo_explorer_input"]
1198
  ],
1199
  js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
1200
  )