Spaces:
Sleeping
Sleeping
Update analyzer.py
Browse files- analyzer.py +9 -5
analyzer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import openai
|
| 2 |
import os
|
| 3 |
import json
|
|
|
|
| 4 |
|
| 5 |
def analyze_code(code: str) -> str:
|
| 6 |
"""
|
|
@@ -39,10 +40,11 @@ def parse_llm_json_response(response: str):
|
|
| 39 |
json_str = response[start:end+1]
|
| 40 |
else:
|
| 41 |
json_str = response
|
| 42 |
-
|
|
|
|
| 43 |
return json.loads(json_str)
|
| 44 |
except Exception as e:
|
| 45 |
-
print("DEBUGGGGG error ::: ",e)
|
| 46 |
return {"error": f"Failed to parse JSON: {e}", "raw": response}
|
| 47 |
|
| 48 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
|
@@ -93,10 +95,11 @@ def analyze_code_chunk(code: str) -> str:
|
|
| 93 |
chunk_prompt = (
|
| 94 |
"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
|
| 95 |
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
|
|
|
| 96 |
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
|
| 97 |
"If you cannot answer, still return a valid JSON with empty strings for each key. "
|
| 98 |
"Example of the ONLY valid output:\n"
|
| 99 |
-
|
| 100 |
)
|
| 101 |
response = client.chat.completions.create(
|
| 102 |
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
|
@@ -117,12 +120,13 @@ def aggregate_chunk_analyses(chunk_jsons: list) -> str:
|
|
| 117 |
client = OpenAI(api_key=os.getenv("modal_api"))
|
| 118 |
client.base_url = os.getenv("base_url")
|
| 119 |
aggregation_prompt = (
|
| 120 |
-
"You are a highly precise and strict, code analyzer and
|
| 121 |
"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
|
|
|
| 122 |
"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
|
| 123 |
"If a key is missing in all chunks, use an empty string. "
|
| 124 |
"Example of the ONLY valid output:\n"
|
| 125 |
-
|
| 126 |
)
|
| 127 |
user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
|
| 128 |
response = client.chat.completions.create(
|
|
|
|
| 1 |
import openai
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
def analyze_code(code: str) -> str:
|
| 7 |
"""
|
|
|
|
| 40 |
json_str = response[start:end+1]
|
| 41 |
else:
|
| 42 |
json_str = response
|
| 43 |
+
# Replace single quotes with double quotes for JSON keys/values
|
| 44 |
+
json_str = re.sub(r"(?<!\\)'", '"', json_str)
|
| 45 |
return json.loads(json_str)
|
| 46 |
except Exception as e:
|
| 47 |
+
print("DEBUGGGGG error ::: ", e)
|
| 48 |
return {"error": f"Failed to parse JSON: {e}", "raw": response}
|
| 49 |
|
| 50 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
|
|
|
| 95 |
chunk_prompt = (
|
| 96 |
"You are a highly precise and strict JSON generator. Analyze the following code chunk. "
|
| 97 |
"Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
| 98 |
+
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
|
| 99 |
"Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
|
| 100 |
"If you cannot answer, still return a valid JSON with empty strings for each key. "
|
| 101 |
"Example of the ONLY valid output:\n"
|
| 102 |
+
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
|
| 103 |
)
|
| 104 |
response = client.chat.completions.create(
|
| 105 |
model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",
|
|
|
|
| 120 |
client = OpenAI(api_key=os.getenv("modal_api"))
|
| 121 |
client.base_url = os.getenv("base_url")
|
| 122 |
aggregation_prompt = (
|
| 123 |
+
"You are a highly precise and strict, code analyzer and JSON generator. You are given a list of JSON analyses of code chunks. "
|
| 124 |
"Aggregate these into a SINGLE overall JSON summary with the same keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
|
| 125 |
+
"All property names and string values MUST use double quotes (\"). Do NOT use single quotes. "
|
| 126 |
"Summarize and combine the information from all chunks. Do NOT include any explanation, markdown, or text outside the JSON. "
|
| 127 |
"If a key is missing in all chunks, use an empty string. "
|
| 128 |
"Example of the ONLY valid output:\n"
|
| 129 |
+
'{\n "strength": "...", \n "weaknesses": "...", \n "speciality": "...", \n "relevance rating": "..."\n}'
|
| 130 |
)
|
| 131 |
user_content = "Here are the chunk analyses:\n" + "\n".join(chunk_jsons)
|
| 132 |
response = client.chat.completions.create(
|