Spaces:
Sleeping
Sleeping
| import openai | |
| import os | |
| import json | |
| def analyze_code(code: str) -> str: | |
| """ | |
| Uses OpenAI's GPT-4.1 mini model to analyze the given code. | |
| Returns the analysis as a string. | |
| """ | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| system_prompt = ( | |
| "You are a highly precise and strict JSON generator. Analyze the code given to you. " | |
| "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " | |
| "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. " | |
| "If you cannot answer, still return a valid JSON with empty strings for each key. " | |
| "Example of the ONLY valid output:\n" | |
| "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}" | |
| ) | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": code} | |
| ], | |
| max_tokens=512, | |
| temperature=0.7 | |
| ) | |
| return response.choices[0].message.content | |
| def parse_llm_json_response(response: str): | |
| try: | |
| # Extract only the substring between the first '{' and the last '}' | |
| print("DEBUGGGGG ::: ",response) | |
| start = response.find('{') | |
| end = response.rfind('}') | |
| if start != -1 and end != -1 and end > start: | |
| json_str = response[start:end+1] | |
| else: | |
| json_str = response | |
| return json.loads(json_str) | |
| except Exception as e: | |
| return {"error": f"Failed to parse JSON: {e}", "raw": response} | |
| def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"): | |
| """ | |
| Combines all .py and .md files in the given directory (recursively) into a single text file. | |
| Returns the path to the combined file. | |
| """ | |
| combined_content = [] | |
| seen_files = set() | |
| # Priority files | |
| priority_files = ["app.py", "README.md"] | |
| for pf in priority_files: | |
| pf_path = os.path.join(repo_dir, pf) | |
| if os.path.isfile(pf_path): | |
| try: | |
| with open(pf_path, "r", encoding="utf-8") as f: | |
| combined_content.append(f"\n# ===== File: {pf} =====\n") | |
| combined_content.append(f.read()) | |
| seen_files.add(os.path.abspath(pf_path)) | |
| except Exception as e: | |
| combined_content.append(f"\n# Could not read {pf_path}: {e}\n") | |
| # All other .py and .md files | |
| for root, _, files in os.walk(repo_dir): | |
| for file in files: | |
| if file.endswith(".py") or file.endswith(".md"): | |
| file_path = os.path.join(root, file) | |
| abs_path = os.path.abspath(file_path) | |
| if abs_path in seen_files: | |
| continue | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| combined_content.append(f"\n# ===== File: {file} =====\n") | |
| combined_content.append(f.read()) | |
| seen_files.add(abs_path) | |
| except Exception as e: | |
| combined_content.append(f"\n# Could not read {file_path}: {e}\n") | |
| with open(output_file, "w", encoding="utf-8") as out_f: | |
| out_f.write("\n".join(combined_content)) | |
| return output_file | |
| def analyze_code_chunk(code: str) -> str: | |
| """ | |
| Analyzes a code chunk and returns a JSON summary for that chunk. | |
| """ | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| chunk_prompt = ( | |
| "You are a highly precise and strict JSON generator. Analyze the following code chunk. " | |
| "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " | |
| "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. " | |
| "If you cannot answer, still return a valid JSON with empty strings for each key. " | |
| "Example of the ONLY valid output:\n" | |
| "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}" | |
| ) | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
| messages=[ | |
| {"role": "system", "content": chunk_prompt}, | |
| {"role": "user", "content": code} | |
| ], | |
| max_tokens=512, | |
| temperature=0.7 | |
| ) | |
| return response.choices[0].message.content | |
| def aggregate_chunk_analyses(chunk_jsons: list) -> str: | |
| """ | |
| Aggregates a list of chunk JSONs into a single JSON summary using the LLM. | |
| """ | |
| from openai import OpenAI | |
| client = OpenAI(api_key=os.getenv("modal_api")) | |
| client.base_url = os.getenv("base_url") | |
| aggregation_prompt = ( | |
| "You are a highly precise and strict JSON generator. You are given a list of JSON analyses of code chunks. " | |
| "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. " | |
| "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. " | |
| "If a key is missing in all chunks, use an empty string. " | |
| "Example of the ONLY valid output:\n" | |
| "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}" | |
| ) | |
| user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons) | |
| response = client.chat.completions.create( | |
| model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", | |
| messages=[ | |
| {"role": "system", "content": aggregation_prompt}, | |
| {"role": "user", "content": user_content} | |
| ], | |
| max_tokens=512, | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| def analyze_combined_file(output_file="combined_repo.txt"): | |
| """ | |
| Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary. | |
| Returns the chunk JSONs (for debugging) and the aggregated analysis as a string. | |
| """ | |
| try: | |
| with open(output_file, "r", encoding="utf-8") as f: | |
| lines = f.readlines() | |
| chunk_size = 500 | |
| chunk_jsons = [] | |
| for i in range(0, len(lines), chunk_size): | |
| chunk = "".join(lines[i:i+chunk_size]) | |
| analysis = analyze_code_chunk(chunk) | |
| chunk_jsons.append(analysis) | |
| final_summary = aggregate_chunk_analyses(chunk_jsons) | |
| debug_output = ( | |
| "==== Chunk JSON Outputs ====" | |
| + "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))]) | |
| + "\n\n==== Final Aggregated Summary ====" | |
| + f"\n{final_summary}" | |
| ) | |
| return debug_output | |
| except Exception as e: | |
| return f"Error analyzing combined file: {e}" | |