naman1102 commited on
Commit
1c90111
·
1 Parent(s): 1d3eed5

Update analyzer.py

Browse files
Files changed (1) hide show
  1. analyzer.py +59 -6
analyzer.py CHANGED
@@ -73,21 +73,74 @@ def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo
73
  out_f.write("\n".join(combined_content))
74
  return output_file
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def analyze_combined_file(output_file="combined_repo.txt"):
77
  """
78
- Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output.
79
  Returns the aggregated analysis as a string.
80
  """
81
  try:
82
  with open(output_file, "r", encoding="utf-8") as f:
83
  lines = f.readlines()
84
  chunk_size = 500
85
- analyses = []
86
  for i in range(0, len(lines), chunk_size):
87
  chunk = "".join(lines[i:i+chunk_size])
88
- analysis = analyze_code(chunk)
89
- analyses.append(analysis)
90
- # Optionally, you could merge the JSONs here, but for now, return all analyses as a list
91
- return "\n---\n".join(analyses)
92
  except Exception as e:
93
  return f"Error analyzing combined file: {e}"
 
73
  out_f.write("\n".join(combined_content))
74
  return output_file
75
 
76
+ def analyze_code_chunk(code: str) -> str:
77
+ """
78
+ Analyzes a code chunk and returns a JSON summary for that chunk.
79
+ """
80
+ from openai import OpenAI
81
+ client = OpenAI(api_key=os.getenv("modal_api"))
82
+ client.base_url = os.getenv("base_url")
83
+ chunk_prompt = (
84
+ "You are a highly precise and strict JSON generator. Analyze the following code chunk. "
85
+ "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
86
+ "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
87
+ "If you cannot answer, still return a valid JSON with empty strings for each key. "
88
+ "Example of the ONLY valid output:\n"
89
+ "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
90
+ )
91
+ response = client.chat.completions.create(
92
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
93
+ messages=[
94
+ {"role": "system", "content": chunk_prompt},
95
+ {"role": "user", "content": code}
96
+ ],
97
+ max_tokens=512,
98
+ temperature=0.7
99
+ )
100
+ return response.choices[0].message.content
101
+
102
+ def aggregate_chunk_analyses(chunk_jsons: list) -> str:
103
+ """
104
+ Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
105
+ """
106
+ from openai import OpenAI
107
+ client = OpenAI(api_key=os.getenv("modal_api"))
108
+ client.base_url = os.getenv("base_url")
109
+ aggregation_prompt = (
110
+ "You are a highly precise and strict JSON generator. You are given a list of JSON analyses of code chunks. "
111
+ "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
112
+ "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
113
+ "If a key is missing in all chunks, use an empty string. "
114
+ "Example of the ONLY valid output:\n"
115
+ "{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
116
+ )
117
+ user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
118
+ response = client.chat.completions.create(
119
+ model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
120
+ messages=[
121
+ {"role": "system", "content": aggregation_prompt},
122
+ {"role": "user", "content": user_content}
123
+ ],
124
+ max_tokens=512,
125
+ temperature=0.3
126
+ )
127
+ return response.choices[0].message.content
128
+
129
  def analyze_combined_file(output_file="combined_repo.txt"):
130
  """
131
+ Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
132
  Returns the aggregated analysis as a string.
133
  """
134
  try:
135
  with open(output_file, "r", encoding="utf-8") as f:
136
  lines = f.readlines()
137
  chunk_size = 500
138
+ chunk_jsons = []
139
  for i in range(0, len(lines), chunk_size):
140
  chunk = "".join(lines[i:i+chunk_size])
141
+ analysis = analyze_code_chunk(chunk)
142
+ chunk_jsons.append(analysis)
143
+ final_summary = aggregate_chunk_analyses(chunk_jsons)
144
+ return final_summary
145
  except Exception as e:
146
  return f"Error analyzing combined file: {e}"