File size: 7,670 Bytes
e0b6f12 1a943f1 f03a154 e0b6f12 27f4250 9a88164 1a943f1 2c15ffb 1a943f1 27f4250 1d3eed5 e0b6f12 27f4250 5b7f342 1a943f1 9f8e537 cd88d31 9f8e537 f03a154 9f8e537 1a943f1 f03a154 1a943f1 5b7f342 631e1ee 5b7f342 631e1ee 5b7f342 791be58 5b7f342 631e1ee 5b7f342 791be58 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 791be58 1c90111 e9db129 791be58 631e1ee 2c15ffb 1c90111 2c15ffb 1c90111 e9db129 791be58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import openai
import os
import json
import re
def analyze_code(code: str) -> str:
"""
Uses OpenAI's GPT-4.1 mini model to analyze the given code.
Returns the analysis as a string.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
system_prompt = (
"You are a highly precise and strict JSON generator. Analyze the code given to you. "
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
"If you cannot answer, still return a valid JSON with empty strings for each key. "
"Example of the ONLY valid output:\n"
"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": code}
],
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content
def parse_llm_json_response(response: str):
try:
# Extract only the substring between the first '{' and the last '}'
print("DEBUGGGGG ::: ",response)
start = response.find('{')
end = response.rfind('}')
if start != -1 and end != -1 and end > start:
json_str = response[start:end+1]
else:
json_str = response
# Replace single quotes with double quotes for JSON keys/values
json_str = re.sub(r"(?<!\\)'", '"', json_str)
return json.loads(json_str)
except Exception as e:
print("DEBUGGGGG error ::: ", e)
return {"error": f"Failed to parse JSON: {e}", "raw": response}
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
"""
Combines all .py and .md files in the given directory (recursively) into a single text file.
Returns the path to the combined file.
"""
combined_content = []
seen_files = set()
# Priority files
priority_files = ["app.py", "README.md"]
for pf in priority_files:
pf_path = os.path.join(repo_dir, pf)
if os.path.isfile(pf_path):
try:
with open(pf_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {pf} =====\n")
combined_content.append(f.read())
seen_files.add(os.path.abspath(pf_path))
except Exception as e:
combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
# All other .py and .md files
for root, _, files in os.walk(repo_dir):
for file in files:
if file.endswith(".py") or file.endswith(".md"):
file_path = os.path.join(root, file)
abs_path = os.path.abspath(file_path)
if abs_path in seen_files:
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {file} =====\n")
combined_content.append(f.read())
seen_files.add(abs_path)
except Exception as e:
combined_content.append(f"\n# Could not read {file_path}: {e}\n")
with open(output_file, "w", encoding="utf-8") as out_f:
out_f.write("\n".join(combined_content))
return output_file
def analyze_code_chunk(code: str) -> str:
"""
Analyzes a code chunk and returns a JSON summary for that chunk.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
chunk_prompt = (
"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
"If you cannot answer, still return a valid JSON with empty strings for each key. "
"Example of the ONLY valid output:\n"
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": chunk_prompt},
{"role": "user", "content": code}
],
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content
def aggregate_chunk_analyses(chunk_jsons: list) -> str:
"""
Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
aggregation_prompt = (
"You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
"If a key is missing in all chunks, use an empty string. "
"Example of the ONLY valid output:\n"
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
)
user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": aggregation_prompt},
{"role": "user", "content": user_content}
],
max_tokens=512,
temperature=0.3
)
return response.choices[0].message.content
def analyze_combined_file(output_file="combined_repo.txt"):
"""
Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
Returns the chunk JSONs (for debugging) and the aggregated analysis as a string.
"""
try:
with open(output_file, "r", encoding="utf-8") as f:
lines = f.readlines()
chunk_size = 500
chunk_jsons = []
for i in range(0, len(lines), chunk_size):
chunk = "".join(lines[i:i+chunk_size])
analysis = analyze_code_chunk(chunk)
chunk_jsons.append(analysis)
final_summary = aggregate_chunk_analyses(chunk_jsons)
debug_output = (
"==== Chunk JSON Outputs ===="
+ "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))])
+ "\n\n==== Final Aggregated Summary ===="
+ f"\n{final_summary}"
)
return debug_output
except Exception as e:
return f"Error analyzing combined file: {e}"
|