import openai import os import json import re def analyze_code(code: str) -> str: """ Uses OpenAI's GPT-4.1 mini model to analyze the given code. Returns the analysis as a string. """ from openai import OpenAI client = OpenAI(api_key=os.getenv("modal_api")) client.base_url = os.getenv("base_url") system_prompt = ( "You are a highly precise and strict JSON generator. Analyze the code given to you. " "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. " "If you cannot answer, still return a valid JSON with empty strings for each key. " "Example of the ONLY valid output:\n" "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}" ) response = client.chat.completions.create( model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": code} ], max_tokens=512, temperature=0.7 ) return response.choices[0].message.content def parse_llm_json_response(response: str): try: # Extract only the substring between the first '{' and the last '}' print("DEBUGGGGG ::: ",response) start = response.find('{') end = response.rfind('}') if start != -1 and end != -1 and end > start: json_str = response[start:end+1] else: json_str = response # Replace single quotes with double quotes for JSON keys/values json_str = re.sub(r"(? str: """ Analyzes a code chunk and returns a JSON summary for that chunk. """ from openai import OpenAI client = OpenAI(api_key=os.getenv("modal_api")) client.base_url = os.getenv("base_url") chunk_prompt = ( "You are a highly precise and strict JSON generator. Analyze the following code chunk. " "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. " "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. " "If you cannot answer, still return a valid JSON with empty strings for each key. " "Example of the ONLY valid output:\n" '{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}' ) response = client.chat.completions.create( model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", messages=[ {"role": "system", "content": chunk_prompt}, {"role": "user", "content": code} ], max_tokens=512, temperature=0.7 ) return response.choices[0].message.content def aggregate_chunk_analyses(chunk_jsons: list) -> str: """ Aggregates a list of chunk JSONs into a single JSON summary using the LLM. """ from openai import OpenAI client = OpenAI(api_key=os.getenv("modal_api")) client.base_url = os.getenv("base_url") aggregation_prompt = ( "You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. " "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " "All property names and string values MUST use double quotes (\"). Do NOT use single quotes. " "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. " "If a key is missing in all chunks, use an empty string. " "Example of the ONLY valid output:\n" '{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}' ) user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons) response = client.chat.completions.create( model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", messages=[ {"role": "system", "content": aggregation_prompt}, {"role": "user", "content": user_content} ], max_tokens=512, temperature=0.3 ) return response.choices[0].message.content def analyze_combined_file(output_file="combined_repo.txt"): """ Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary. Returns the chunk JSONs (for debugging) and the aggregated analysis as a string. """ try: with open(output_file, "r", encoding="utf-8") as f: lines = f.readlines() chunk_size = 500 chunk_jsons = [] for i in range(0, len(lines), chunk_size): chunk = "".join(lines[i:i+chunk_size]) analysis = analyze_code_chunk(chunk) chunk_jsons.append(analysis) final_summary = aggregate_chunk_analyses(chunk_jsons) debug_output = ( "==== Chunk JSON Outputs ====" + "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))]) + "\n\n==== Final Aggregated Summary ====" + f"\n{final_summary}" ) return debug_output except Exception as e: return f"Error analyzing combined file: {e}"