Spaces:
Sleeping
Sleeping
File size: 4,038 Bytes
e0b6f12 1a943f1 e0b6f12 27f4250 9a88164 1a943f1 2c15ffb 1a943f1 27f4250 1d3eed5 e0b6f12 27f4250 5b7f342 1a943f1 5b7f342 631e1ee 5b7f342 631e1ee 5b7f342 791be58 5b7f342 631e1ee 5b7f342 791be58 2c15ffb 791be58 631e1ee 2c15ffb 791be58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import openai
import os
import json
def analyze_code(code: str) -> str:
"""
Uses OpenAI's GPT-4.1 mini model to analyze the given code.
Returns the analysis as a string.
"""
from openai import OpenAI
client = OpenAI(api_key=os.getenv("modal_api"))
client.base_url = os.getenv("base_url")
system_prompt = (
"You are a highly precise and strict JSON generator. Analyze the code given to you. "
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
"If you cannot answer, still return a valid JSON with empty strings for each key. "
"Example of the ONLY valid output:\n"
"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
)
response = client.chat.completions.create(
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": code}
],
max_tokens=512,
temperature=0.7
)
return response.choices[0].message.content
def parse_llm_json_response(response: str):
try:
return json.loads(response)
except Exception as e:
return {"error": f"Failed to parse JSON: {e}", "raw": response}
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
"""
Combines all .py and .md files in the given directory (recursively) into a single text file.
Returns the path to the combined file.
"""
combined_content = []
seen_files = set()
# Priority files
priority_files = ["app.py", "README.md"]
for pf in priority_files:
pf_path = os.path.join(repo_dir, pf)
if os.path.isfile(pf_path):
try:
with open(pf_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {pf} =====\n")
combined_content.append(f.read())
seen_files.add(os.path.abspath(pf_path))
except Exception as e:
combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
# All other .py and .md files
for root, _, files in os.walk(repo_dir):
for file in files:
if file.endswith(".py") or file.endswith(".md"):
file_path = os.path.join(root, file)
abs_path = os.path.abspath(file_path)
if abs_path in seen_files:
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
combined_content.append(f"\n# ===== File: {file} =====\n")
combined_content.append(f.read())
seen_files.add(abs_path)
except Exception as e:
combined_content.append(f"\n# Could not read {file_path}: {e}\n")
with open(output_file, "w", encoding="utf-8") as out_f:
out_f.write("\n".join(combined_content))
return output_file
def analyze_combined_file(output_file="combined_repo.txt"):
"""
Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output.
Returns the aggregated analysis as a string.
"""
try:
with open(output_file, "r", encoding="utf-8") as f:
lines = f.readlines()
chunk_size = 500
analyses = []
for i in range(0, len(lines), chunk_size):
chunk = "".join(lines[i:i+chunk_size])
analysis = analyze_code(chunk)
analyses.append(analysis)
# Optionally, you could merge the JSONs here, but for now, return all analyses as a list
return "\n---\n".join(analyses)
except Exception as e:
return f"Error analyzing combined file: {e}"
|