Sushwetabm commited on
Commit
9a2b71a
Β·
1 Parent(s): aff0b1f

updated analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +23 -20
analyzer.py CHANGED
@@ -192,6 +192,7 @@
192
  # "error_type": type(e).__name__
193
  # }
194
 
 
195
  # analyzer.py
196
 
197
  import torch
@@ -207,42 +208,44 @@ formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s")
207
  handler.setFormatter(formatter)
208
  logger.addHandler(handler)
209
 
210
-
211
  def analyze_code(tokenizer, model, language, code):
 
 
 
 
212
  start_time = time.time()
213
 
214
- prompt = (
215
- f"Language: {language}\n"
216
- f"Task: Fix the following buggy code and explain the bugs.\n"
217
- f"Input Code:\n{code.strip()}\n\n"
218
- f"Respond with a JSON like this:\n"
219
- f"{{\n"
220
- f" \"bug_analysis\": [{{\"line_number\": X, \"error_message\": \"...\", \"explanation\": \"...\", \"fix_suggestion\": \"...\"}}],\n"
221
- f" \"corrected_code\": \"...\"\n"
222
- f"}}"
223
- )
224
 
225
  try:
 
226
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
227
- output = model.generate(**inputs, max_new_tokens=1024)
228
- response = tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
229
 
230
- # Clean response if needed
231
- cleaned = response.strip().strip("```json").strip("```").strip()
232
- json_output = json.loads(cleaned)
233
 
234
  return {
235
- "bug_analysis": json_output.get("bug_analysis", []),
236
- "corrected_code": json_output.get("corrected_code", code)
237
  }
238
 
239
  except Exception as e:
 
240
  return {
241
  "bug_analysis": [{
242
  "line_number": 0,
243
- "error_message": "Failed to parse",
244
  "explanation": str(e),
245
- "fix_suggestion": "Try simplifying the code."
246
  }],
247
  "corrected_code": code
248
  }
 
192
  # "error_type": type(e).__name__
193
  # }
194
 
195
+ # analyzer.py
196
  # analyzer.py
197
 
198
  import torch
 
208
  handler.setFormatter(formatter)
209
  logger.addHandler(handler)
210
 
 
211
  def analyze_code(tokenizer, model, language, code):
212
+ """
213
+ Analyze and fix buggy code using CodeT5+ model with 'fix:' prompt prefix.
214
+ Works across multiple programming languages.
215
+ """
216
  start_time = time.time()
217
 
218
+ # Prepare prompt in CodeT5+ style
219
+ prompt = f"fix: {code.strip()}"
220
+
221
+ logger.info(f"πŸ” Starting analysis for language: {language}")
222
+ logger.info(f"🧾 Prompt: {prompt[:80]}...")
 
 
 
 
 
223
 
224
  try:
225
+ # Tokenize and generate response
226
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device)
227
+ with torch.no_grad():
228
+ output = model.generate(**inputs, max_new_tokens=1024)
229
+
230
+ # Decode output
231
+ response = tokenizer.decode(output[0], skip_special_tokens=True).strip()
232
 
233
+ elapsed = round(time.time() - start_time, 2)
234
+ logger.info(f"βœ… Inference completed in {elapsed}s")
 
235
 
236
  return {
237
+ "bug_analysis": [], # Optional: You could add heuristics here
238
+ "corrected_code": response
239
  }
240
 
241
  except Exception as e:
242
+ logger.error(f"❌ Error during analysis: {e}")
243
  return {
244
  "bug_analysis": [{
245
  "line_number": 0,
246
+ "error_message": "Inference failed",
247
  "explanation": str(e),
248
+ "fix_suggestion": "Try again with simpler code or retry later"
249
  }],
250
  "corrected_code": code
251
  }