File size: 8,829 Bytes
e0b6f12 1a943f1 f03a154 e0b6f12 27f4250 9a88164 1a943f1 2c15ffb 1a943f1 27f4250 1d3eed5 e0b6f12 27f4250 5b7f342 1a943f1 82d6e3b 9f8e537 82d6e3b 9f8e537 82d6e3b 1a943f1 f03a154 1a943f1 5b7f342 631e1ee 5b7f342 631e1ee 5b7f342 791be58 5b7f342 631e1ee 5b7f342 791be58 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 f03a154 1c90111 791be58 1c90111 e9db129 791be58 631e1ee 2c15ffb 1c90111 2c15ffb 1c90111 e9db129 791be58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import openai
import os
import json
import re
def analyze_code(code: str) -> str:
"""
Uses OpenAI's GPT-4.1 mini model to analyze the given code.
Returns the analysis as a string.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
system_prompt = (
"You are a highly precise and strict JSON generator. Analyze the code given to you. "
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
"If you cannot answer, still return a valid JSON with empty strings for each key. "
"Example of the ONLY valid output:\n"
"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": code}
],
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content
def parse_llm_json_response(response: str):
try:
print("DEBUGGGGG ::: ", response)
# 1. Extract the JSON object part of the string
start = response.find('{')
end = response.rfind('}')
if start == -1 or end == -1 or end < start:
raise ValueError("No valid JSON object found in the response.")
json_str = response[start:end+1]
# 2. Replace single quotes used for keys/values with double quotes.
# This handles cases like {'key': 'value'}
json_str = re.sub(r"'", '"', json_str)
# 3. Find all string values and escape any unescaped double quotes inside them.
# This uses a function as the replacement in re.sub
def escape_inner_quotes(match):
# The match object gives us the full string matched by the regex.
# We take the part between the outer quotes (group 1)
# and replace any \" with a temporary unique placeholder.
# Then, we replace any remaining " with \", and finally
# restore the original escaped quotes.
inner_content = match.group(1)
placeholder = "___TEMP_QUOTE___"
inner_content = inner_content.replace('\\"', placeholder)
inner_content = inner_content.replace('"', '\\"')
inner_content = inner_content.replace(placeholder, '\\"')
return f'"{inner_content}"'
# This regex finds a double quote, captures everything until the next double quote,
# and then applies the function to that captured group.
json_str = re.sub(r'"(.*?)"', escape_inner_quotes, json_str)
return json.loads(json_str)
except Exception as e:
print("DEBUGGGGG error ::: ", e)
return {"error": f"Failed to parse JSON: {e}", "raw": response}
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
"""
Combines all .py and .md files in the given directory (recursively) into a single text file.
Returns the path to the combined file.
"""
combined_content = []
seen_files = set()
# Priority files
priority_files = ["app.py", "README.md"]
for pf in priority_files:
pf_path = os.path.join(repo_dir, pf)
if os.path.isfile(pf_path):
try:
with open(pf_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {pf} =====\n")
combined_content.append(f.read())
seen_files.add(os.path.abspath(pf_path))
except Exception as e:
combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
# All other .py and .md files
for root, _, files in os.walk(repo_dir):
for file in files:
if file.endswith(".py") or file.endswith(".md"):
file_path = os.path.join(root, file)
abs_path = os.path.abspath(file_path)
if abs_path in seen_files:
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {file} =====\n")
combined_content.append(f.read())
seen_files.add(abs_path)
except Exception as e:
combined_content.append(f"\n# Could not read {file_path}: {e}\n")
with open(output_file, "w", encoding="utf-8") as out_f:
out_f.write("\n".join(combined_content))
return output_file
def analyze_code_chunk(code: str) -> str:
"""
Analyzes a code chunk and returns a JSON summary for that chunk.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
chunk_prompt = (
"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
"If you cannot answer, still return a valid JSON with empty strings for each key. "
"Example of the ONLY valid output:\n"
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": chunk_prompt},
{"role": "user", "content": code}
],
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content
def aggregate_chunk_analyses(chunk_jsons: list) -> str:
"""
Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
aggregation_prompt = (
"You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
"If a key is missing in all chunks, use an empty string. "
"Example of the ONLY valid output:\n"
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
)
user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
messages=[
{"role": "system", "content": aggregation_prompt},
{"role": "user", "content": user_content}
],
max_tokens=512,
temperature=0.3
)
return response.choices[0].message.content
def analyze_combined_file(output_file="combined_repo.txt"):
"""
Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
Returns the chunk JSONs (for debugging) and the aggregated analysis as a string.
"""
try:
with open(output_file, "r", encoding="utf-8") as f:
lines = f.readlines()
chunk_size = 500
chunk_jsons = []
for i in range(0, len(lines), chunk_size):
chunk = "".join(lines[i:i+chunk_size])
analysis = analyze_code_chunk(chunk)
chunk_jsons.append(analysis)
final_summary = aggregate_chunk_analyses(chunk_jsons)
debug_output = (
"==== Chunk JSON Outputs ===="
+ "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))])
+ "\n\n==== Final Aggregated Summary ===="
+ f"\n{final_summary}"
)
return debug_output
except Exception as e:
return f"Error analyzing combined file: {e}"
|