Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Running

App Files Files Community

naman1102 commited on 15 days ago

Commit

1c90111

1 Parent(s): 1d3eed5

Update analyzer.py

Browse files

Files changed (1) hide show

analyzer.py +59 -6

analyzer.py CHANGED Viewed

@@ -73,21 +73,74 @@ def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo
         out_f.write("\n".join(combined_content))
     return output_file
 def analyze_combined_file(output_file="combined_repo.txt"):
     """
-    Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output.
     Returns the aggregated analysis as a string.
     """
     try:
         with open(output_file, "r", encoding="utf-8") as f:
             lines = f.readlines()
         chunk_size = 500
-        analyses = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])
-            analysis = analyze_code(chunk)
-            analyses.append(analysis)
-        # Optionally, you could merge the JSONs here, but for now, return all analyses as a list
-        return "\n---\n".join(analyses)
     except Exception as e:
         return f"Error analyzing combined file: {e}"

         out_f.write("\n".join(combined_content))
     return output_file
+def analyze_code_chunk(code: str) -> str:
+    """
+    Analyzes a code chunk and returns a JSON summary for that chunk.
+    """
+    from openai import OpenAI
+    client = OpenAI(api_key=os.getenv("modal_api"))
+    client.base_url = os.getenv("base_url")
+    chunk_prompt = (
+        "You are a highly precise and strict JSON generator. Analyze the following code chunk. "
+        "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
+        "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
+        "If you cannot answer, still return a valid JSON with empty strings for each key. "
+        "Example of the ONLY valid output:\n"
+        "{\n  'strength': '...', \n  'weaknesses': '...', \n  'speciality': '...', \n  'relevance rating': '...'\n}"
+    )
+    response = client.chat.completions.create(
+        model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
+        messages=[
+            {"role": "system", "content": chunk_prompt},
+            {"role": "user", "content": code}
+        ],
+        max_tokens=512,
+        temperature=0.7
+    )
+    return response.choices[0].message.content
+def aggregate_chunk_analyses(chunk_jsons: list) -> str:
+    """
+    Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
+    """
+    from openai import OpenAI
+    client = OpenAI(api_key=os.getenv("modal_api"))
+    client.base_url = os.getenv("base_url")
+    aggregation_prompt = (
+        "You are a highly precise and strict JSON generator. You are given a list of JSON analyses of code chunks. "
+        "Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
+        "Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
+        "If a key is missing in all chunks, use an empty string. "
+        "Example of the ONLY valid output:\n"
+        "{\n  'strength': '...', \n  'weaknesses': '...', \n  'speciality': '...', \n  'relevance rating': '...'\n}"
+    )
+    user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
+    response = client.chat.completions.create(
+        model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
+        messages=[
+            {"role": "system", "content": aggregation_prompt},
+            {"role": "user", "content": user_content}
+        ],
+        max_tokens=512,
+        temperature=0.3
+    )
+    return response.choices[0].message.content
 def analyze_combined_file(output_file="combined_repo.txt"):
     """
+    Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
     Returns the aggregated analysis as a string.
     """
     try:
         with open(output_file, "r", encoding="utf-8") as f:
             lines = f.readlines()
         chunk_size = 500
+        chunk_jsons = []
         for i in range(0, len(lines), chunk_size):
             chunk = "".join(lines[i:i+chunk_size])
+            analysis = analyze_code_chunk(chunk)
+            chunk_jsons.append(analysis)
+        final_summary = aggregate_chunk_analyses(chunk_jsons)
+        return final_summary
     except Exception as e:
         return f"Error analyzing combined file: {e}"