File size: 4,038 Bytes
e0b6f12
 
1a943f1
e0b6f12
 
 
 
 
 
27f4250
9a88164
 
1a943f1
2c15ffb
 
 
 
 
 
1a943f1
27f4250
1d3eed5
e0b6f12
 
 
 
 
 
 
27f4250
5b7f342
1a943f1
 
 
 
 
 
5b7f342
 
 
 
 
 
631e1ee
 
 
 
 
 
 
 
 
 
 
 
 
 
5b7f342
 
 
 
631e1ee
 
 
5b7f342
 
791be58
5b7f342
631e1ee
5b7f342
 
 
 
 
791be58
 
 
2c15ffb
 
791be58
 
 
631e1ee
2c15ffb
 
 
 
 
 
 
 
791be58
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import openai
import os
import json

def analyze_code(code: str) -> str:
    """
    Uses OpenAI's GPT-4.1 mini model to analyze the given code.
    Returns the analysis as a string.
    """
    from openai import OpenAI
    client = OpenAI(api_key=os.getenv("modal_api"))
    client.base_url = os.getenv("base_url")
    system_prompt = (
        "You are a highly precise and strict JSON generator. Analyze the code given to you. "
        "Your ONLY output must be a valid JSON object with the following keys: 'strength', 'weaknesses', 'speciality', 'relevance rating'. "
        "Do NOT include any explanation, markdown, or text outside the JSON. Do NOT add any commentary, preamble, or postscript. "
        "If you cannot answer, still return a valid JSON with empty strings for each key. "
        "Example of the ONLY valid output:\n"
        "{\n  'strength': '...', \n  'weaknesses': '...', \n  'speciality': '...', \n  'relevance rating': '...'\n}"
    )
    response = client.chat.completions.create(
        model="Orion-zhen/Qwen2.5-Coder-7B-Instruct-AWQ",  # Updated model
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": code}
        ],
        max_tokens=512,
        temperature=0.7
    )
    return response.choices[0].message.content

def parse_llm_json_response(response: str):
    try:
        return json.loads(response)
    except Exception as e:
        return {"error": f"Failed to parse JSON: {e}", "raw": response}

def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
    """
    Combines all .py and .md files in the given directory (recursively) into a single text file.
    Returns the path to the combined file.
    """
    combined_content = []
    seen_files = set()
    # Priority files
    priority_files = ["app.py", "README.md"]
    for pf in priority_files:
        pf_path = os.path.join(repo_dir, pf)
        if os.path.isfile(pf_path):
            try:
                with open(pf_path, "r", encoding="utf-8") as f:
                    combined_content.append(f"\n# ===== File: {pf} =====\n")
                    combined_content.append(f.read())
                seen_files.add(os.path.abspath(pf_path))
            except Exception as e:
                combined_content.append(f"\n# Could not read {pf_path}: {e}\n")
    # All other .py and .md files
    for root, _, files in os.walk(repo_dir):
        for file in files:
            if file.endswith(".py") or file.endswith(".md"):
                file_path = os.path.join(root, file)
                abs_path = os.path.abspath(file_path)
                if abs_path in seen_files:
                    continue
                try:
                    with open(file_path, "r", encoding="utf-8") as f:
                        combined_content.append(f"\n# ===== File: {file} =====\n")
                        combined_content.append(f.read())
                    seen_files.add(abs_path)
                except Exception as e:
                    combined_content.append(f"\n# Could not read {file_path}: {e}\n")
    with open(output_file, "w", encoding="utf-8") as out_f:
        out_f.write("\n".join(combined_content))
    return output_file

def analyze_combined_file(output_file="combined_repo.txt"):
    """
    Reads the combined file, splits it into 500-line chunks, analyzes each chunk, and aggregates the LLM's output.
    Returns the aggregated analysis as a string.
    """
    try:
        with open(output_file, "r", encoding="utf-8") as f:
            lines = f.readlines()
        chunk_size = 500
        analyses = []
        for i in range(0, len(lines), chunk_size):
            chunk = "".join(lines[i:i+chunk_size])
            analysis = analyze_code(chunk)
            analyses.append(analysis)
        # Optionally, you could merge the JSONs here, but for now, return all analyses as a list
        return "\n---\n".join(analyses)
    except Exception as e:
        return f"Error analyzing combined file: {e}"