File size: 17,314 Bytes
4b4b93c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import openai # Using the openai library for Nebius's OpenAI-compatible API
import os     # For environment variables if not using config_loader directly here
import json
# Import API key and base URL from our config loader
from utils.config_loader import OPENAI_API_KEY

# Initialize the OpenAI client
client = None
if OPENAI_API_KEY:
    try:
        client = openai.OpenAI(
            api_key=OPENAI_API_KEY
            # No base_url needed for direct OpenAI
        )
        print("OpenAI client initialized successfully in llm_handler.")
    except Exception as e:
        print(f"Error initializing OpenAI client in llm_handler: {e}")
        client = None
else:
    print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.")


def get_simple_issue_suggestion(
        issues_data: list[dict],
        language: str,
        target_count: int = 1,
        model_name: str = "gpt-4o-mini", # Or your preferred model
        additional_prompt_context: str = "" # NEW parameter
    ) -> str | None:
    """
    Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner.
    """
    if not client:
        print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.")
        return "LLM client (OpenAI) not initialized. Check API Key configuration."
    if not issues_data:
        print("No issues provided to LLM for suggestion.")
        return "No issues provided to LLM for suggestion."

    prompt_issues_str = "" # Rebuild this based on your existing logic
    for i, issue in enumerate(issues_data):
        snippet = issue.get('body_snippet', 'No description available.')
        title = issue.get('title', 'No title')
        url = issue.get('html_url', '#')
        labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels"
        prompt_issues_str += (
            f"\n--- Issue {i+1} ---\n"
            f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n"
        )

    system_prompt = (
        "You are an expert assistant helping a new open-source contributor. "
        "Your task is to analyze the provided list of GitHub issues and recommend "
        f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). "
        "Consider factors like clarity, labels, and apparent scope. "
        f"{additional_prompt_context}" # ADDED additional context here
        " If the user-specified language seems mismatched with the provided issues, please make your best judgment "
        "based on the issue content itself or note the potential mismatch in your recommendation."
    )
    user_prompt = (
        # ... (user prompt construction as before, including prompt_issues_str) ...
        f"Here is a list of GitHub issues found when searching for the language '{language}'. "
        # (The additional_prompt_context is now in the system prompt)
        f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. "
        f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. "
        f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')."
        f"\nHere are the issues:\n{prompt_issues_str}"
    )

    temperature_val = 0.4
    max_tokens_val = 200 + (target_count * 150)
    top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p

    print(f"\nSending request to OpenAI LLM for issue suggestion...")
    print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}")

    try:
        completion = client.chat.completions.create( # Ensure client is defined
            model=model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            # ... other params
            temperature=0.4,
            max_tokens=200 + (target_count * 150),
            top_p=0.9
        )

        suggestion_text = completion.choices[0].message.content
        print("OpenAI LLM Suggestion Received.")
        return suggestion_text.strip()

    except openai.APIConnectionError as e:
        print(f"OpenAI API Connection Error: {e}")
        return f"LLM suggestion failed due to connection error: {e}"
    except openai.RateLimitError as e: # Good to handle this explicitly
        print(f"OpenAI API Rate Limit Error: {e}")
        return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage."
    except openai.AuthenticationError as e: # Added for bad API key
        print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.")
        return f"LLM suggestion failed due to authentication error: {e}."
    except openai.APIStatusError as e:
        print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}")
        return f"LLM suggestion failed due to API status error: {e.status_code}"
    except Exception as e:
        print(f"LLM API call to OpenAI failed with an unexpected error: {e}")
        print(f"Type of error: {type(e)}")
        return f"LLM suggestion failed with an unexpected error: {e}"

# --- NEW FUNCTION 1: Summarize Text Content ---
def summarize_text_content(
        text_content: str,
        purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section"
        max_summary_tokens: int = 200, # Adjust as needed
        model_name: str = "gpt-4o-mini" # Or your preferred model
    ) -> str | None:
    """
    Summarizes a given text content using an LLM.
    """
    if not client:
        print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.")
        return "LLM Client not initialized. Cannot summarize."
    if not text_content or not text_content.strip():
        print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.")
        return "No content provided for summarization."

    # Heuristic: If text is already short, just return it or a small part.
    # This avoids wasting API calls on tiny texts. (Count words approx)
    if len(text_content.split()) < 75 : # Arbitrary threshold for "short"
        print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.")
        return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content


    system_prompt = (
        f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. "
        "Focus on the most critical information a new contributor would need. "
        "For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. "
        "Keep the summary brief and actionable."
    )
    user_prompt = (
        f"Please summarize the key points of the following {purpose} document:\n\n"
        f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM
        # Using 8000 characters as a rough limit to fit within context windows & manage cost.
        # Adjust this based on typical CONTRIBUTING.md length and model context limits.
    )

    print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}")
    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            temperature=0.2, # Lower temperature for factual summarization
            max_tokens=max_summary_tokens,
            top_p=1.0
        )
        summary_text = completion.choices[0].message.content
        print(f"LLM Handler: Summary for {purpose} received.")
        return summary_text.strip()
    except Exception as e:
        print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}")
        return f"Could not summarize the {purpose}: LLM API error."

# --- NEW FUNCTION 2: Suggest Relevant Code Locations ---
def suggest_relevant_code_locations(
        issue_snippet: str,
        file_list: list[str],
        language: str, # Language of the project
        max_suggestion_tokens: int = 200, # Adjust as needed
        model_name: str = "gpt-4o-mini" # Or your preferred model
    ) -> str | None:
    """
    Suggests relevant files/folders based on an issue snippet and a list of files.
    """
    if not client:
        print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.")
        return "LLM Client not initialized. Cannot suggest locations."
    if not issue_snippet or not issue_snippet.strip():
        return "No issue description provided to suggest locations."
    if not file_list:
        return "No file list provided to suggest locations from."

    # Format file list for the prompt
    formatted_file_list = "\n".join([f"- `{f}`" for f in file_list])
    if not formatted_file_list: # Should not happen if file_list is not empty
        formatted_file_list = "No files listed."

    system_prompt = (
        f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. "
        "Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders."
    )
    user_prompt = (
        f"A developer is starting work on an issue with the following description snippet:\n"
        f"'''\n{issue_snippet}\n'''\n\n"
        f"The top-level files and folders available in the repository are:\n"
        f"{formatted_file_list}\n\n"
        f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. "
        f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. "
        f"If no files seem obviously relevant from the top-level list, say so."
    )

    print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}")
    try:
        completion = client.chat.completions.create(
            model=model_name,
            messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            temperature=0.5, # Moderate temperature for some reasoning
            max_tokens=max_suggestion_tokens,
            top_p=1.0
        )
        suggestion_text = completion.choices[0].message.content
        print("LLM Handler: Code location suggestions received.")
        return suggestion_text.strip()
    except Exception as e:
        print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}")
        return f"Could not suggest code locations: LLM API error."

def plan_onboarding_kit_components(
        issue_data: dict,
        language_searched: str,
        model_name: str = "gpt-4.1-mini" # Or your preferred model
    ) -> dict | None:
    """
    Uses an LLM to decide which onboarding kit components are most relevant for a given issue.
    Returns a dictionary based on the LLM's JSON output.
    """
    if not client:
        print("ERROR (llm_handler.plan_kit): LLM client not initialized.")
        return None # Or: {"error": "LLM Client not initialized"}
    if not issue_data:
        print("ERROR (llm_handler.plan_kit): No issue data provided for planning.")
        return None # Or: {"error": "No issue data"}

    issue_title = issue_data.get("title", "N/A")
    issue_snippet = issue_data.get("body_snippet", "No description available.")
    issue_labels = issue_data.get("labels", [])

    # Define available kit components for the LLM to choose from
    available_components = [
        "repo_details_and_clone_command",      # Basic repo info, clone command
        "contribution_guidelines_link",        # Link to CONTRIBUTING.md
        "contribution_guidelines_summary_ai",  # AI Summary of CONTRIBUTING.md
        "repository_structure_modal_ai",       # File listing via Modal + AI suggested files
        # We could break down "repository_structure_modal_ai" further if needed:
        # "repository_files_modal_raw_list",
        # "ai_suggested_start_files_from_list"
    ]
    components_description = (
        "- repo_details_and_clone_command: Basic repository information and git clone command.\n"
        "- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n"
        "- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n"
        "- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue."
    )

    system_prompt = (
        "You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan "
        "the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. "
        "You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, "
        "where each string is one of the component names provided."
    )
    user_prompt = (
        f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n"
        f"Issue Title: \"{issue_title}\"\n"
        f"Issue Snippet: \"{issue_snippet}\"\n"
        f"Issue Labels: {issue_labels}\n\n"
        f"And considering the following available onboarding kit components and their descriptions:\n"
        f"{components_description}\n\n"
        f"Which components should be included in the onboarding kit for this specific issue to be most helpful? "
        f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. "
        f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). "
        f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: "
        f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}"
    )

    print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}")
    try:
        # Forcing JSON response mode if available and model supports it well
        # gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well.
        # For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models.
        completion_params = {
            "model": model_name,
            "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
            "temperature": 0.2, # Low temperature for more deterministic structural output
            "max_tokens": 200, # JSON output should be relatively small
            "top_p": 1.0,
        }
        # Check if the model might be one that supports explicit JSON mode via response_format
        if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known
             completion_params["response_format"] = {"type": "json_object"}


        completion = client.chat.completions.create(**completion_params)
        
        raw_response_content = completion.choices[0].message.content
        print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}")

        # Attempt to parse the JSON
        parsed_plan = json.loads(raw_response_content)
        if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list):
            # Further validation: ensure all component names are valid (optional but good)
            valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components]
            if len(valid_components) != len(parsed_plan["include_components"]):
                print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.")
            
            final_plan = {"include_components": valid_components}
            print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}")
            return final_plan
        else:
            print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).")
            return {"error": "LLM response format error", "details": "Missing 'include_components' list."}

    except json.JSONDecodeError as json_e:
        print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}")
        return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content}
    except Exception as e:
        print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}")
        return {"error": f"LLM API call failed: {str(e)}"}