Update analyzer.py
Browse files- analyzer.py +59 -6
analyzer.py
CHANGED
@@ -73,21 +73,74 @@ def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo
|
|
73 |
out_f.write("\n".join(combined_content))
|
74 |
return output_file
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def analyze_combined_file(output_file="combined_repo.txt"):
|
77 |
"""
|
78 |
-
Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output.
|
79 |
Returns the aggregated analysis as a string.
|
80 |
"""
|
81 |
try:
|
82 |
with open(output_file, "r", encoding="utf-8") as f:
|
83 |
lines = f.readlines()
|
84 |
chunk_size = 500
|
85 |
-
|
86 |
for i in range(0, len(lines), chunk_size):
|
87 |
chunk = "".join(lines[i:i+chunk_size])
|
88 |
-
analysis =
|
89 |
-
|
90 |
-
|
91 |
-
return
|
92 |
except Exception as e:
|
93 |
return f"Error analyzing combined file: {e}"
|
|
|
73 |
out_f.write("\n".join(combined_content))
|
74 |
return output_file
|
75 |
|
76 |
+
def analyze_code_chunk(code: str) -> str:
|
77 |
+
"""
|
78 |
+
Analyzes a code chunk and returns a JSON summary for that chunk.
|
79 |
+
"""
|
80 |
+
from openai import OpenAI
|
81 |
+
client = OpenAI(api_key=os.getenv("modal_api"))
|
82 |
+
client.base_url = os.getenv("base_url")
|
83 |
+
chunk_prompt = (
|
84 |
+
"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
|
85 |
+
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
86 |
+
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
|
87 |
+
"If you cannot answer, still return a valid JSON with empty strings for each key. "
|
88 |
+
"Example of the ONLY valid output:\n"
|
89 |
+
"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
|
90 |
+
)
|
91 |
+
response = client.chat.completions.create(
|
92 |
+
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
93 |
+
messages=[
|
94 |
+
{"role": "system", "content": chunk_prompt},
|
95 |
+
{"role": "user", "content": code}
|
96 |
+
],
|
97 |
+
max_tokens=512,
|
98 |
+
temperature=0.7
|
99 |
+
)
|
100 |
+
return response.choices[0].message.content
|
101 |
+
|
102 |
+
def aggregate_chunk_analyses(chunk_jsons: list) -> str:
|
103 |
+
"""
|
104 |
+
Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
|
105 |
+
"""
|
106 |
+
from openai import OpenAI
|
107 |
+
client = OpenAI(api_key=os.getenv("modal_api"))
|
108 |
+
client.base_url = os.getenv("base_url")
|
109 |
+
aggregation_prompt = (
|
110 |
+
"You are a highly precise and strict JSON generator. You are given a list of JSON analyses of code chunks. "
|
111 |
+
"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
112 |
+
"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
|
113 |
+
"If a key is missing in all chunks, use an empty string. "
|
114 |
+
"Example of the ONLY valid output:\n"
|
115 |
+
"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
|
116 |
+
)
|
117 |
+
user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
|
118 |
+
response = client.chat.completions.create(
|
119 |
+
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
120 |
+
messages=[
|
121 |
+
{"role": "system", "content": aggregation_prompt},
|
122 |
+
{"role": "user", "content": user_content}
|
123 |
+
],
|
124 |
+
max_tokens=512,
|
125 |
+
temperature=0.3
|
126 |
+
)
|
127 |
+
return response.choices[0].message.content
|
128 |
+
|
129 |
def analyze_combined_file(output_file="combined_repo.txt"):
|
130 |
"""
|
131 |
+
Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
|
132 |
Returns the aggregated analysis as a string.
|
133 |
"""
|
134 |
try:
|
135 |
with open(output_file, "r", encoding="utf-8") as f:
|
136 |
lines = f.readlines()
|
137 |
chunk_size = 500
|
138 |
+
chunk_jsons = []
|
139 |
for i in range(0, len(lines), chunk_size):
|
140 |
chunk = "".join(lines[i:i+chunk_size])
|
141 |
+
analysis = analyze_code_chunk(chunk)
|
142 |
+
chunk_jsons.append(analysis)
|
143 |
+
final_summary = aggregate_chunk_analyses(chunk_jsons)
|
144 |
+
return final_summary
|
145 |
except Exception as e:
|
146 |
return f"Error analyzing combined file: {e}"
|