Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Sleeping

App Files Files Community

HF_RepoSense / analyzer.py

naman1102

Update analyzer.py

f03a154 19 days ago

raw

history blame

7.67 kB

	import openai
	import os
	import json
	import re

	def analyze_code(code: str) -> str:
	"""
	Uses OpenAI's GPT-4.1 mini model to analyze the given code.
	Returns the analysis as a string.
	"""
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")
	system_prompt = (
	"You are a highly precise and strict JSON generator. Analyze the code given to you. "
	"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
	"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
	"If you cannot answer, still return a valid JSON with empty strings for each key. "
	"Example of the ONLY valid output:\n"
	"{\n 'strength': '...', \n 'weaknesses': '...', \n 'speciality': '...', \n 'relevance rating': '...'\n}"
	)
	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ", # Updated model
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": code}
	],
	max_tokens=512,
	temperature=0.7
	)
	return response.choices[0].message.content

	def parse_llm_json_response(response: str):
	try:
	# Extract only the substring between the first '{' and the last '}'
	print("DEBUGGGGG ::: ",response)
	start = response.find('{')
	end = response.rfind('}')
	if start != -1 and end != -1 and end > start:
	json_str = response[start:end+1]
	else:
	json_str = response
	# Replace single quotes with double quotes for JSON keys/values
	json_str = re.sub(r"(?<!\\)'", '"', json_str)
	return json.loads(json_str)
	except Exception as e:
	print("DEBUGGGGG error ::: ", e)
	return {"error": f"Failed to parse JSON: {e}", "raw": response}

	def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
	"""
	Combines all .py and .md files in the given directory (recursively) into a single text file.
	Returns the path to the combined file.
	"""
	combined_content = []
	seen_files = set()
	# Priority files
	priority_files = ["app.py", "README.md"]
	for pf in priority_files:
	pf_path = os.path.join(repo_dir, pf)
	if os.path.isfile(pf_path):
	try:
	with open(pf_path, "r", encoding="utf-8") as f:
	combined_content.append(f"\n# ===== File: {pf} =====\n")
	combined_content.append(f.read())
	seen_files.add(os.path.abspath(pf_path))
	except Exception as e:
	combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
	# All other .py and .md files
	for root, _, files in os.walk(repo_dir):
	for file in files:
	if file.endswith(".py") or file.endswith(".md"):
	file_path = os.path.join(root, file)
	abs_path = os.path.abspath(file_path)
	if abs_path in seen_files:
	continue
	try:
	with open(file_path, "r", encoding="utf-8") as f:
	combined_content.append(f"\n# ===== File: {file} =====\n")
	combined_content.append(f.read())
	seen_files.add(abs_path)
	except Exception as e:
	combined_content.append(f"\n# Could not read {file_path}: {e}\n")
	with open(output_file, "w", encoding="utf-8") as out_f:
	out_f.write("\n".join(combined_content))
	return output_file

	def analyze_code_chunk(code: str) -> str:
	"""
	Analyzes a code chunk and returns a JSON summary for that chunk.
	"""
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")
	chunk_prompt = (
	"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
	"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
	"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
	"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
	"If you cannot answer, still return a valid JSON with empty strings for each key. "
	"Example of the ONLY valid output:\n"
	'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
	)
	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
	messages=[
	{"role": "system", "content": chunk_prompt},
	{"role": "user", "content": code}
	],
	max_tokens=512,
	temperature=0.7
	)
	return response.choices[0].message.content

	def aggregate_chunk_analyses(chunk_jsons: list) -> str:
	"""
	Aggregates a list of chunk JSONs into a single JSON summary using the LLM.
	"""
	from openai import OpenAI
	client = OpenAI(api_key=os.getenv("modal_api"))
	client.base_url = os.getenv("base_url")
	aggregation_prompt = (
	"You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
	"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
	"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
	"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
	"If a key is missing in all chunks, use an empty string. "
	"Example of the ONLY valid output:\n"
	'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
	)
	user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
	response = client.chat.completions.create(
	model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
	messages=[
	{"role": "system", "content": aggregation_prompt},
	{"role": "user", "content": user_content}
	],
	max_tokens=512,
	temperature=0.3
	)
	return response.choices[0].message.content

	def analyze_combined_file(output_file="combined_repo.txt"):
	"""
	Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output into a final summary.
	Returns the chunk JSONs (for debugging) and the aggregated analysis as a string.
	"""
	try:
	with open(output_file, "r", encoding="utf-8") as f:
	lines = f.readlines()
	chunk_size = 500
	chunk_jsons = []
	for i in range(0, len(lines), chunk_size):
	chunk = "".join(lines[i:i+chunk_size])
	analysis = analyze_code_chunk(chunk)
	chunk_jsons.append(analysis)
	final_summary = aggregate_chunk_analyses(chunk_jsons)
	debug_output = (
	"==== Chunk JSON Outputs ===="
	+ "\n\n".join([f"Chunk {i+1} JSON:\n{chunk_jsons[i]}" for i in range(len(chunk_jsons))])
	+ "\n\n==== Final Aggregated Summary ===="
	+ f"\n{final_summary}"
	)
	return debug_output
	except Exception as e:
	return f"Error analyzing combined file: {e}"