# import json # def analyze_code(language, code, tokenizer, model): # messages = [ # { # "role": "system", # "content": ( # "You are a helpful and expert-level AI code reviewer and bug fixer. " # "Your task is to analyze the given buggy code in the specified programming language, " # "identify bugs (logical, syntax, runtime, etc.), and fix them. " # "Return a JSON object with the following keys:\n\n" # "1. 'bug_analysis': a list of objects, each containing:\n" # " - 'line_number': the line number (approximate if needed)\n" # " - 'error_message': a short name of the bug\n" # " - 'explanation': short explanation of the problem\n" # " - 'fix_suggestion': how to fix it\n" # "2. 'corrected_code': the entire corrected code block.\n\n" # "Respond with ONLY the raw JSON object, no extra commentary or markdown." # ) # }, # { # "role": "user", # "content": f"๐Ÿ’ป Language: {language}\n๐Ÿž Buggy Code:\n```{language.lower()}\n{code.strip()}\n```" # } # ] # inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) # attention_mask = (inputs != tokenizer.pad_token_id).long() # outputs = model.generate( # inputs, # attention_mask=attention_mask, # max_new_tokens=1024, # do_sample=False, # pad_token_id=tokenizer.eos_token_id, # eos_token_id=tokenizer.eos_token_id # ) # response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True) # # Try parsing response to JSON # try: # json_output = json.loads(response) # return json_output # except json.JSONDecodeError: # print("โš ๏ธ Could not decode response into JSON. Here's the raw output:\n") # print(response) # return None # import json # import logging # import time # import torch # # Configure logging # logger = logging.getLogger(__name__) # def analyze_code(language, code, tokenizer, model): # """ # Analyze code and return bug analysis with improved logging and error handling # """ # start_time = time.time() # logger.info(f"๐Ÿ” Starting analysis for {language} code ({len(code)} characters)") # try: # # Prepare messages # messages = [ # { # "role": "system", # "content": ( # "You are a helpful and expert-level AI code reviewer and bug fixer. " # "Your task is to analyze the given buggy code in the specified programming language, " # "identify bugs (logical, syntax, runtime, etc.), and fix them. " # "Return a JSON object with the following keys:\n\n" # "1. 'bug_analysis': a list of objects, each containing:\n" # " - 'line_number': the line number (approximate if needed)\n" # " - 'error_message': a short name of the bug\n" # " - 'explanation': short explanation of the problem\n" # " - 'fix_suggestion': how to fix it\n" # "2. 'corrected_code': the entire corrected code block.\n\n" # "Respond with ONLY the raw JSON object, no extra commentary or markdown." # ) # }, # { # "role": "user", # "content": f"๐Ÿ’ป Language: {language}\n๐Ÿž Buggy Code:\n```{language.lower()}\n{code.strip()}\n```" # } # ] # logger.info("๐Ÿ”ง Applying chat template...") # inputs = tokenizer.apply_chat_template( # messages, # add_generation_prompt=True, # return_tensors="pt" # ).to(model.device) # attention_mask = (inputs != tokenizer.pad_token_id).long() # logger.info(f"๐Ÿ“ Input length: {inputs.shape[1]} tokens") # logger.info("๐Ÿš€ Starting model generation...") # generation_start = time.time() # # Generate with more conservative settings # with torch.no_grad(): # Ensure no gradients are computed # outputs = model.generate( # inputs, # attention_mask=attention_mask, # max_new_tokens=512, # Reduced from 1024 for faster inference # do_sample=False, # temperature=0.1, # Add temperature for more consistent output # pad_token_id=tokenizer.eos_token_id, # eos_token_id=tokenizer.eos_token_id, # use_cache=True, # Enable KV cache for efficiency # ) # generation_time = time.time() - generation_start # logger.info(f"โšก Generation completed in {generation_time:.2f} seconds") # logger.info("๐Ÿ“ Decoding response...") # response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True) # logger.info(f"๐Ÿ“„ Response length: {len(response)} characters") # logger.info(f"๐Ÿ” First 100 chars: {response[:100]}...") # # Try parsing response to JSON # logger.info("๐Ÿ” Attempting to parse JSON...") # try: # # Clean up response - remove any markdown formatting # cleaned_response = response.strip() # if cleaned_response.startswith('```json'): # cleaned_response = cleaned_response[7:] # if cleaned_response.startswith('```'): # cleaned_response = cleaned_response[3:] # if cleaned_response.endswith('```'): # cleaned_response = cleaned_response[:-3] # cleaned_response = cleaned_response.strip() # json_output = json.loads(cleaned_response) # total_time = time.time() - start_time # logger.info(f"โœ… Analysis completed successfully in {total_time:.2f} seconds") # # Validate the JSON structure # if not isinstance(json_output, dict): # raise ValueError("Response is not a dictionary") # if 'bug_analysis' not in json_output: # logger.warning("โš ๏ธ Missing 'bug_analysis' key, adding empty list") # json_output['bug_analysis'] = [] # if 'corrected_code' not in json_output: # logger.warning("โš ๏ธ Missing 'corrected_code' key, adding original code") # json_output['corrected_code'] = code # return json_output # except json.JSONDecodeError as e: # logger.error(f"โŒ JSON decode error: {e}") # logger.error(f"๐Ÿ“„ Raw response: {repr(response)}") # # Return a fallback structure with the raw response # fallback_response = { # "bug_analysis": [{ # "line_number": 1, # "error_message": "Analysis parsing failed", # "explanation": "The AI model returned a response that couldn't be parsed as JSON", # "fix_suggestion": "Please try again or check the code format" # }], # "corrected_code": code, # "raw_output": response, # "parsing_error": str(e) # } # return fallback_response # except Exception as e: # total_time = time.time() - start_time # logger.error(f"โŒ Analysis failed after {total_time:.2f} seconds: {str(e)}") # logger.error(f"๐Ÿ’ฅ Exception type: {type(e).__name__}") # # Return error response # return { # "bug_analysis": [{ # "line_number": 1, # "error_message": "Analysis failed", # "explanation": f"An error occurred during analysis: {str(e)}", # "fix_suggestion": "Please try again or contact support" # }], # "corrected_code": code, # "error": str(e), # "error_type": type(e).__name__ # } # analyzer.py # analyzer.py import torch import json import time import logging # Configure logger logger = logging.getLogger("CodeAnalyzer") logger.setLevel(logging.INFO) handler = logging.StreamHandler() formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) def analyze_code(tokenizer, model, language, code): """ Analyze and fix buggy code using CodeT5+ model with 'fix:' prompt prefix. Works across multiple programming languages. """ start_time = time.time() # Prepare prompt in CodeT5+ style prompt = f"fix: {code.strip()}" logger.info(f"๐Ÿ” Starting analysis for language: {language}") logger.info(f"๐Ÿงพ Prompt: {prompt[:80]}...") try: # Tokenize and generate response inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(model.device) with torch.no_grad(): output = model.generate(**inputs, max_new_tokens=1024) # Decode output response = tokenizer.decode(output[0], skip_special_tokens=True).strip() elapsed = round(time.time() - start_time, 2) logger.info(f"โœ… Inference completed in {elapsed}s") return { "bug_analysis": [], # Optional: You could add heuristics here "corrected_code": response } except Exception as e: logger.error(f"โŒ Error during analysis: {e}") return { "bug_analysis": [{ "line_number": 0, "error_message": "Inference failed", "explanation": str(e), "fix_suggestion": "Try again with simpler code or retry later" }], "corrected_code": code }