added all files
Browse files- README.md +69 -14
- core/__init__.py +0 -0
- core/github_client.py +268 -0
- core/kit_generator.py +244 -0
- core/llm_handler.py +317 -0
- core/modal_processor.py +51 -0
- modal_definitions.py +78 -0
- requirements.txt +5 -0
- run_day1_tests.py +66 -0
- utils/config_loader.py +19 -0
README.md
CHANGED
@@ -1,14 +1,69 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ContribNavigator
|
2 |
+
|
3 |
+
ContribNavigator is an AI-assisted tool designed to help developers, especially newcomers, find and begin contributing to open-source projects on GitHub. It streamlines issue discovery and provides an intelligently generated onboarding kit.
|
4 |
+
|
5 |
+
## Core Features
|
6 |
+
|
7 |
+
* **Targeted Issue Search:** Filters GitHub issues by programming language and user-specified topics (e.g., "python" + "machine-learning"), prioritizing beginner-friendly labels like "good first issue."
|
8 |
+
* **AI-Powered Issue Suggestion:** Leverages an LLM (OpenAI GPT-4o class) to analyze fetched issues and recommend the most suitable starting point for a beginner, with clear reasoning.
|
9 |
+
* **Intelligent Onboarding Kit:**
|
10 |
+
* **AI-Planned Components:** An LLM planner dynamically determines the most relevant sections to include in the kit based on the selected issue.
|
11 |
+
* **Essential Information:** Direct links to the issue and repository, `git clone` command.
|
12 |
+
* **Contribution Guidelines Analysis:**
|
13 |
+
* Link to the project's `CONTRIBUTING.md` (or similar).
|
14 |
+
* AI-generated summary of key contribution procedures (setup, coding style, PR process).
|
15 |
+
* **Repository Overview (via Modal):**
|
16 |
+
* Top-level file and directory listing from a sandboxed `git clone` operation executed on Modal.
|
17 |
+
* AI-suggested relevant files/folders to investigate for the specific issue.
|
18 |
+
* **First Steps Checklist:** An interactive checklist in the UI to guide users through their initial contribution actions.
|
19 |
+
* **Agentic Design:** Built with an agentic architecture where an LLM plans and orchestrates the use of internal "tools" (GitHub API client, Modal repo inspector, LLM analysis functions), aligning with Model Context Protocol (MCP) principles.
|
20 |
+
|
21 |
+
## Tech Stack
|
22 |
+
|
23 |
+
* **UI:** Gradio
|
24 |
+
* **Backend & Orchestration:** Python
|
25 |
+
* **Language Models:** OpenAI API (GPT-4o class)
|
26 |
+
* **Sandboxed Operations:** Modal (for `git clone`)
|
27 |
+
* **Data Source:** GitHub API
|
28 |
+
|
29 |
+
## Local Development Setup
|
30 |
+
|
31 |
+
1. **Prerequisites:**
|
32 |
+
* Git
|
33 |
+
* Python (3.10+)
|
34 |
+
* Modal Account & CLI (`modal token new`)
|
35 |
+
|
36 |
+
2. **Clone & Install Dependencies:**
|
37 |
+
```bash
|
38 |
+
git clone https://github.com/YOUR_USERNAME/ContribNavigator.git # Replace with your repo URL
|
39 |
+
cd ContribNavigator
|
40 |
+
python -m venv .venv
|
41 |
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
42 |
+
pip install -r requirements.txt
|
43 |
+
```
|
44 |
+
|
45 |
+
3. **Environment Variables:**
|
46 |
+
* Create a `.env` file in the project root (refer to `.env.example` if provided).
|
47 |
+
* Add your API keys:
|
48 |
+
```env
|
49 |
+
GITHUB_PAT="your_github_pat"
|
50 |
+
OPENAI_API_KEY="sk-your_openai_key"
|
51 |
+
# MODAL_TOKEN_ID="mi_..." # Optional for local if `modal token new` was used
|
52 |
+
# MODAL_TOKEN_SECRET="ms_..." # Optional for local
|
53 |
+
```
|
54 |
+
|
55 |
+
4. **Run the Application:**
|
56 |
+
```bash
|
57 |
+
python app.py
|
58 |
+
```
|
59 |
+
|
60 |
+
## Hackathon Context
|
61 |
+
|
62 |
+
* **Project:** ContribNavigator
|
63 |
+
* **Event:** Agents & MCP Hackathon
|
64 |
+
* **Primary Track Submission:** Agentic Demos Track
|
65 |
+
* *Demonstrates an end-to-end AI agent application using Gradio that assists with open-source contributions through intelligent planning and tool utilization (GitHub API, Modal, LLM analysis), reflecting MCP principles.*
|
66 |
+
|
67 |
+
## License
|
68 |
+
|
69 |
+
This project is licensed under the MIT License. See the `LICENSE` file for details.
|
core/__init__.py
ADDED
File without changes
|
core/github_client.py
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import os # Good practice
|
3 |
+
# from urllib.parse import quote # We are letting requests handle most param encoding
|
4 |
+
|
5 |
+
# Import the GITHUB_PAT from our config loader
|
6 |
+
from utils.config_loader import GITHUB_PAT # Make sure this path is correct for your setup
|
7 |
+
|
8 |
+
BASE_SEARCH_URL = "https://api.github.com/search/issues"
|
9 |
+
BASE_REPO_URL = "https://api.github.com/repos"
|
10 |
+
|
11 |
+
def _make_github_request(url: str, params: dict = None, headers: dict = None) -> dict | None:
|
12 |
+
"""
|
13 |
+
Helper function to make authenticated GitHub API GET requests expecting JSON response.
|
14 |
+
Handles potential request errors.
|
15 |
+
"""
|
16 |
+
if not GITHUB_PAT:
|
17 |
+
print("ERROR (github_client._make_github_request): GITHUB_PAT is not configured.")
|
18 |
+
return None
|
19 |
+
|
20 |
+
default_headers = {
|
21 |
+
"Authorization": f"token {GITHUB_PAT}",
|
22 |
+
"Accept": "application/vnd.github.v3+json",
|
23 |
+
"X-GitHub-Api-Version": "2022-11-28"
|
24 |
+
}
|
25 |
+
if headers:
|
26 |
+
default_headers.update(headers)
|
27 |
+
|
28 |
+
try:
|
29 |
+
# print(f"Debug GitHub Request: URL={url}, Params={params}, Headers={default_headers}") # Uncomment for deep debugging
|
30 |
+
response = requests.get(url, headers=default_headers, params=params, timeout=15) # Increased timeout slightly
|
31 |
+
response.raise_for_status()
|
32 |
+
return response.json() # Expecting JSON
|
33 |
+
except requests.exceptions.Timeout:
|
34 |
+
print(f"ERROR (github_client._make_github_request): GitHub API request timed out for URL: {url}")
|
35 |
+
return None
|
36 |
+
except requests.exceptions.HTTPError as http_err:
|
37 |
+
# Log specific error details, especially for common issues like 401, 403, 404, 422
|
38 |
+
error_message = f"ERROR (github_client._make_github_request): GitHub API HTTP error for URL {url}: {http_err}."
|
39 |
+
try:
|
40 |
+
# Attempt to get more detailed error message from GitHub's JSON response
|
41 |
+
error_details = http_err.response.json()
|
42 |
+
error_message += f" Details: {error_details.get('message', 'No specific message')} Docs: {error_details.get('documentation_url', 'N/A')}"
|
43 |
+
except ValueError: # If response is not JSON
|
44 |
+
error_message += f" Response: {http_err.response.text}"
|
45 |
+
print(error_message)
|
46 |
+
return None # For HTTP errors, generally return None
|
47 |
+
except requests.exceptions.RequestException as req_err:
|
48 |
+
print(f"ERROR (github_client._make_github_request): GitHub API request failed for URL {url}: {req_err}")
|
49 |
+
return None
|
50 |
+
except ValueError as json_err: # Includes JSONDecodeError if response.json() fails
|
51 |
+
print(f"ERROR (github_client._make_github_request): Failed to decode JSON response from URL {url}: {json_err}")
|
52 |
+
return None
|
53 |
+
|
54 |
+
def fetch_beginner_issues(
|
55 |
+
language: str,
|
56 |
+
topics: list[str] | None = None,
|
57 |
+
labels: list[str] | None = None, # This allows explicit override of default label logic
|
58 |
+
sort: str = "updated",
|
59 |
+
order: str = "desc",
|
60 |
+
per_page: int = 10,
|
61 |
+
page: int = 1
|
62 |
+
) -> list[dict] | None:
|
63 |
+
"""
|
64 |
+
Fetches beginner-friendly issues for a given language, optional topics,
|
65 |
+
and optional explicit labels from GitHub's public repositories.
|
66 |
+
"""
|
67 |
+
if not language:
|
68 |
+
print("ERROR (github_client.fetch_beginner_issues): Language parameter is required.")
|
69 |
+
return None
|
70 |
+
|
71 |
+
current_labels_to_use = [] # Initialize
|
72 |
+
|
73 |
+
if labels is not None: # If labels are explicitly passed, use them
|
74 |
+
current_labels_to_use = labels
|
75 |
+
print(f"GitHub Client: Using explicitly passed labels: {current_labels_to_use}")
|
76 |
+
else: # No explicit labels passed, use our default logic based on topics
|
77 |
+
if topics:
|
78 |
+
# When topics are specified, be more focused.
|
79 |
+
current_labels_to_use = ["good first issue"]
|
80 |
+
# Optionally, could expand to: current_labels_to_use = ["good first issue", "help wanted"]
|
81 |
+
# Let's start with just "good first issue" when topics are present for max precision.
|
82 |
+
print(f"GitHub Client: Topics specified, using focused labels: {current_labels_to_use}")
|
83 |
+
else:
|
84 |
+
# No topics specified, cast a slightly wider net with labels.
|
85 |
+
current_labels_to_use = ["good first issue", "help wanted", "beginner", "first-timers-only"]
|
86 |
+
print(f"GitHub Client: No topics specified, using broader default labels: {current_labels_to_use}")
|
87 |
+
|
88 |
+
query_parts = [
|
89 |
+
f"language:{language.strip().lower()}", # Normalize language
|
90 |
+
"state:open",
|
91 |
+
"is:issue",
|
92 |
+
"is:public"
|
93 |
+
]
|
94 |
+
|
95 |
+
# Add label query parts only if current_labels_to_use is not empty
|
96 |
+
if current_labels_to_use:
|
97 |
+
for label_name in current_labels_to_use:
|
98 |
+
if label_name.strip(): # Ensure label is not just whitespace
|
99 |
+
query_parts.append(f'label:"{label_name.strip()}"')
|
100 |
+
else: # If current_labels_to_use ended up empty (e.g., if explicitly passed as []), don't add label filter
|
101 |
+
print("GitHub Client: No labels will be applied to the search query.")
|
102 |
+
|
103 |
+
|
104 |
+
# Add topics to query if provided
|
105 |
+
if topics:
|
106 |
+
for topic_name_raw in topics:
|
107 |
+
topic_name = topic_name_raw.strip().lower() # Assuming topics from dropdown are already slugs or correct phrases
|
108 |
+
if topic_name:
|
109 |
+
if " " in topic_name: # If the topic phrase itself has a space
|
110 |
+
query_parts.append(f'topic:"{topic_name}"')
|
111 |
+
else: # Assumed to be a slug or single word
|
112 |
+
query_parts.append(f'topic:{topic_name}')
|
113 |
+
|
114 |
+
q_string = " ".join(query_parts)
|
115 |
+
params = {"q": q_string, "sort": sort, "order": order, "per_page": per_page, "page": page}
|
116 |
+
|
117 |
+
print(f"GitHub Client: Fetching issues with q_string: '{q_string}'")
|
118 |
+
data = _make_github_request(BASE_SEARCH_URL, params=params)
|
119 |
+
|
120 |
+
if data and "items" in data:
|
121 |
+
issues_list = []
|
122 |
+
for item in data["items"]:
|
123 |
+
repo_html_url = "/".join(item.get("html_url", "").split('/')[:5])
|
124 |
+
issues_list.append({
|
125 |
+
"title": item.get("title"), "html_url": item.get("html_url"),
|
126 |
+
"state": item.get("state"), "number": item.get("number"),
|
127 |
+
"created_at": item.get("created_at"), "updated_at": item.get("updated_at"),
|
128 |
+
"labels": [label_item.get("name") for label_item in item.get("labels", [])],
|
129 |
+
"repository_api_url": item.get("repository_url"),
|
130 |
+
"repository_html_url": repo_html_url,
|
131 |
+
"user_login": item.get("user", {}).get("login"),
|
132 |
+
"body_snippet": item.get("body", "")[:300] + "..." if item.get("body") else "No body provided."
|
133 |
+
})
|
134 |
+
return issues_list
|
135 |
+
elif data and "items" not in data:
|
136 |
+
print(f"GitHub Client: No 'items' in API response for query '{q_string}'. API Message: {data.get('message', 'N/A')}")
|
137 |
+
return []
|
138 |
+
return None
|
139 |
+
|
140 |
+
|
141 |
+
def get_repository_details(repo_api_url: str) -> dict | None:
|
142 |
+
"""
|
143 |
+
Fetches details for a specific repository using its API URL.
|
144 |
+
Primarily used to get the default_branch.
|
145 |
+
"""
|
146 |
+
if not repo_api_url:
|
147 |
+
print("ERROR (github_client.get_repository_details): No repository API URL provided.")
|
148 |
+
return None
|
149 |
+
print(f"GitHub Client: Fetching repository details from: {repo_api_url}")
|
150 |
+
return _make_github_request(repo_api_url)
|
151 |
+
|
152 |
+
|
153 |
+
def get_file_url_from_repo(repo_full_name: str, file_paths_to_check: list[str], default_branch: str | None = None) -> str | None:
|
154 |
+
"""
|
155 |
+
Checks for the existence of a file in a list of possible paths within a repository
|
156 |
+
and returns its HTML URL if found.
|
157 |
+
"""
|
158 |
+
if not repo_full_name or not file_paths_to_check:
|
159 |
+
print("ERROR (github_client.get_file_url_from_repo): repo_full_name and file_paths_to_check are required.")
|
160 |
+
return None
|
161 |
+
|
162 |
+
branch_to_use = default_branch
|
163 |
+
if not branch_to_use:
|
164 |
+
print(f"GitHub Client (get_file_url): No default branch provided for {repo_full_name}, attempting to fetch it.")
|
165 |
+
repo_api_url_for_details = f"{BASE_REPO_URL}/{repo_full_name}"
|
166 |
+
repo_details = get_repository_details(repo_api_url_for_details)
|
167 |
+
if repo_details and repo_details.get("default_branch"):
|
168 |
+
branch_to_use = repo_details.get("default_branch")
|
169 |
+
print(f"GitHub Client (get_file_url): Fetched default branch '{branch_to_use}' for {repo_full_name}.")
|
170 |
+
else:
|
171 |
+
print(f"GitHub Client (get_file_url): Could not determine default branch for {repo_full_name}. Will try common fallbacks.")
|
172 |
+
# If default branch still not found, function will iterate through fallbacks next.
|
173 |
+
|
174 |
+
# Define branches to try: the determined/passed one, then common fallbacks if needed.
|
175 |
+
branches_to_attempt = []
|
176 |
+
if branch_to_use:
|
177 |
+
branches_to_attempt.append(branch_to_use)
|
178 |
+
# Add fallbacks if the initial branch_to_use was None OR if we want to always check fallbacks (but usually not)
|
179 |
+
if not branch_to_use: # Only add fallbacks if we couldn't determine one
|
180 |
+
branches_to_attempt.extend(["main", "master"])
|
181 |
+
|
182 |
+
|
183 |
+
for current_branch_attempt in branches_to_attempt:
|
184 |
+
print(f"GitHub Client (get_file_url): Trying branch '{current_branch_attempt}' for {repo_full_name}.")
|
185 |
+
for file_path in file_paths_to_check:
|
186 |
+
file_api_url = f"{BASE_REPO_URL}/{repo_full_name}/contents/{file_path}?ref={current_branch_attempt}"
|
187 |
+
# print(f"GitHub Client (get_file_url): Checking for file at API URL: {file_api_url}") # Can be verbose
|
188 |
+
file_metadata = _make_github_request(file_api_url) # This expects JSON
|
189 |
+
if file_metadata and isinstance(file_metadata, dict) and file_metadata.get("html_url"):
|
190 |
+
print(f"GitHub Client (get_file_url): Found '{file_path}' in {repo_full_name} on branch '{current_branch_attempt}'.")
|
191 |
+
return file_metadata.get("html_url")
|
192 |
+
# else: # No need to print "not found" for every path/branch combination, becomes too noisy.
|
193 |
+
# _make_github_request will print if it's a 404 or other HTTP error.
|
194 |
+
|
195 |
+
print(f"GitHub Client (get_file_url): Could not find any of {file_paths_to_check} in {repo_full_name} on attempted branches.")
|
196 |
+
return None
|
197 |
+
|
198 |
+
# --- NEW FUNCTION ---
|
199 |
+
def get_file_content(repo_full_name: str, file_path: str, branch: str | None = None) -> str | None:
|
200 |
+
"""
|
201 |
+
Fetches the raw text content of a specific file from a repository.
|
202 |
+
Args:
|
203 |
+
repo_full_name: The repository name in "owner/repo" format.
|
204 |
+
file_path: The path to the file within the repository (e.g., "CONTRIBUTING.md").
|
205 |
+
branch: The branch to fetch from. If None, attempts to find default branch.
|
206 |
+
Returns:
|
207 |
+
The text content of the file, or None if not found or an error occurs.
|
208 |
+
"""
|
209 |
+
if not repo_full_name or not file_path:
|
210 |
+
print("ERROR (github_client.get_file_content): repo_full_name and file_path are required.")
|
211 |
+
return None
|
212 |
+
|
213 |
+
current_branch = branch
|
214 |
+
if not current_branch:
|
215 |
+
print(f"GitHub Client (get_file_content): No branch specified for {repo_full_name}/{file_path}, finding default.")
|
216 |
+
repo_api_url_for_details = f"{BASE_REPO_URL}/{repo_full_name}"
|
217 |
+
repo_details = get_repository_details(repo_api_url_for_details)
|
218 |
+
if repo_details and repo_details.get("default_branch"):
|
219 |
+
current_branch = repo_details.get("default_branch")
|
220 |
+
print(f"GitHub Client (get_file_content): Using default branch '{current_branch}' for {repo_full_name}/{file_path}")
|
221 |
+
else:
|
222 |
+
# Try common fallbacks if default cannot be determined
|
223 |
+
print(f"GitHub Client (get_file_content): Could not determine default branch for {repo_full_name}. Trying 'main', then 'master' for {file_path}.")
|
224 |
+
# Attempt 'main' first for get_file_content call
|
225 |
+
current_branch = "main"
|
226 |
+
# If 'main' fails, we could try 'master' subsequently, but let's try one at a time.
|
227 |
+
# The request below will try current_branch. If it fails with 404, we could then retry with 'master'.
|
228 |
+
# For now, let's simplify: try determined default, else 'main'. If that 404s, the user gets None.
|
229 |
+
|
230 |
+
file_api_url = f"{BASE_REPO_URL}/{repo_full_name}/contents/{file_path}?ref={current_branch}"
|
231 |
+
print(f"GitHub Client (get_file_content): Fetching raw content for '{file_path}' from '{repo_full_name}' on branch '{current_branch}'.")
|
232 |
+
|
233 |
+
if not GITHUB_PAT:
|
234 |
+
print("ERROR (github_client.get_file_content): GITHUB_PAT is not configured.")
|
235 |
+
return None
|
236 |
+
|
237 |
+
headers = {
|
238 |
+
"Authorization": f"token {GITHUB_PAT}",
|
239 |
+
"Accept": "application/vnd.github.raw", # Key header for raw content
|
240 |
+
"X-GitHub-Api-Version": "2022-11-28"
|
241 |
+
}
|
242 |
+
|
243 |
+
try:
|
244 |
+
response = requests.get(file_api_url, headers=headers, timeout=15) # Increased timeout
|
245 |
+
response.raise_for_status()
|
246 |
+
return response.text # Return raw text content
|
247 |
+
except requests.exceptions.Timeout:
|
248 |
+
print(f"ERROR (github_client.get_file_content): GitHub API request timed out for URL: {file_api_url}")
|
249 |
+
return None
|
250 |
+
except requests.exceptions.HTTPError as http_err:
|
251 |
+
if http_err.response.status_code == 404:
|
252 |
+
print(f"INFO (github_client.get_file_content): File not found (404) at {file_api_url}")
|
253 |
+
# If default branch was 'main' and failed, we could try 'master' here as a fallback
|
254 |
+
if current_branch == "main" and (not branch or branch == "main"): # Check if we already tried specific branch or if 'main' was a fallback
|
255 |
+
print(f"GitHub Client (get_file_content): '{file_path}' not found on 'main', trying 'master' as fallback.")
|
256 |
+
return get_file_content(repo_full_name, file_path, branch="master") # Recursive call with 'master'
|
257 |
+
else:
|
258 |
+
error_message = f"ERROR (github_client.get_file_content): GitHub API HTTP error for URL {file_api_url}: {http_err}."
|
259 |
+
try:
|
260 |
+
error_details = http_err.response.json() # Some errors might still be JSON
|
261 |
+
error_message += f" Details: {error_details.get('message', http_err.response.text)}"
|
262 |
+
except ValueError:
|
263 |
+
error_message += f" Response: {http_err.response.text}"
|
264 |
+
print(error_message)
|
265 |
+
return None
|
266 |
+
except requests.exceptions.RequestException as req_err:
|
267 |
+
print(f"ERROR (github_client.get_file_content): GitHub API request failed for URL {file_api_url}: {req_err}")
|
268 |
+
return None
|
core/kit_generator.py
ADDED
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# core/kit_generator.py
|
2 |
+
from .github_client import get_repository_details, get_file_url_from_repo, get_file_content
|
3 |
+
from .modal_processor import get_repo_file_listing_via_modal
|
4 |
+
from .llm_handler import summarize_text_content, suggest_relevant_code_locations
|
5 |
+
|
6 |
+
# --- Helper function to get common repo info needed by multiple sections ---
|
7 |
+
def _get_common_repo_info(issue_data: dict) -> tuple[str | None, str | None, str]:
|
8 |
+
"""Extracts/derives repo_full_name, repo_api_url, and default_branch_name."""
|
9 |
+
repo_html_url = issue_data.get("repository_html_url", "#")
|
10 |
+
repo_api_url = issue_data.get("repository_api_url")
|
11 |
+
|
12 |
+
repo_full_name = None
|
13 |
+
if repo_html_url and repo_html_url.startswith("https://github.com/"):
|
14 |
+
parts = repo_html_url.split('/')
|
15 |
+
if len(parts) >= 5:
|
16 |
+
repo_full_name = f"{parts[3]}/{parts[4]}"
|
17 |
+
|
18 |
+
branch_from_api = None
|
19 |
+
default_branch_name = "main (assumed)" # Fallback
|
20 |
+
if repo_full_name:
|
21 |
+
# Use the direct repo_api_url from issue_data if available and valid
|
22 |
+
current_repo_api_url = repo_api_url
|
23 |
+
if not current_repo_api_url or not current_repo_api_url.startswith("https://api.github.com/repos/"):
|
24 |
+
# If not valid, construct it from repo_full_name
|
25 |
+
current_repo_api_url = f"https://api.github.com/repos/{repo_full_name}"
|
26 |
+
print(f"Kit Generator (_get_common_repo_info): Constructed repo_api_url: {current_repo_api_url}")
|
27 |
+
|
28 |
+
repo_details = get_repository_details(current_repo_api_url)
|
29 |
+
if repo_details and repo_details.get("default_branch"):
|
30 |
+
branch_from_api = repo_details.get("default_branch")
|
31 |
+
default_branch_name = branch_from_api
|
32 |
+
|
33 |
+
return repo_full_name, branch_from_api, default_branch_name
|
34 |
+
|
35 |
+
# --- Helper functions for generating individual kit sections ---
|
36 |
+
|
37 |
+
def _generate_repo_details_section(issue_data: dict, default_branch_name: str) -> str:
|
38 |
+
issue_title = issue_data.get("title", "N/A")
|
39 |
+
issue_html_url = issue_data.get("html_url", "#")
|
40 |
+
repo_html_url = issue_data.get("repository_html_url", "#")
|
41 |
+
|
42 |
+
# Ensure .git suffix for clone command displayed to user
|
43 |
+
clone_url_display = repo_html_url if repo_html_url.endswith(".git") else repo_html_url + ".git"
|
44 |
+
repo_name_for_cd = repo_html_url.split('/')[-1] if repo_html_url != "#" else "repository-name"
|
45 |
+
|
46 |
+
|
47 |
+
return f"""
|
48 |
+
## 🔗 Issue Details
|
49 |
+
- **Issue Link:** [{issue_title}]({issue_html_url})
|
50 |
+
- **Repository:** [{repo_html_url}]({repo_html_url})
|
51 |
+
|
52 |
+
## 🛠️ Initial Setup Guide
|
53 |
+
1. **Clone the Repository:**
|
54 |
+
Open your terminal and run the following command to clone the repository to your local machine:
|
55 |
+
```bash
|
56 |
+
git clone {clone_url_display}
|
57 |
+
```
|
58 |
+
*(Note: Ensure you have Git installed. The repository might use a different default branch name than '{default_branch_name}'.)*
|
59 |
+
|
60 |
+
2. **Navigate into the Project Directory:**
|
61 |
+
```bash
|
62 |
+
cd {repo_name_for_cd}
|
63 |
+
```
|
64 |
+
*(This assumes the directory name matches the repository name. Adjust if needed.)*
|
65 |
+
"""
|
66 |
+
|
67 |
+
def _generate_contribution_guidelines_section(
|
68 |
+
repo_full_name: str | None,
|
69 |
+
branch_from_api: str | None
|
70 |
+
) -> str:
|
71 |
+
section_title = "## 📖 Contribution Guidelines\nIt's highly recommended to read the project's contribution guidelines before you start coding.\n"
|
72 |
+
guidelines_link_markdown = "- **Guidelines Link:** _Could not find contribution guidelines in common locations._"
|
73 |
+
summary_markdown = ""
|
74 |
+
|
75 |
+
if repo_full_name:
|
76 |
+
contributing_paths_to_check = [
|
77 |
+
"CONTRIBUTING.md", ".github/CONTRIBUTING.md", "docs/CONTRIBUTING.md",
|
78 |
+
"CONTRIBUTING.rst", ".github/CONTRIBUTING.rst", "CONTRIBUTING"
|
79 |
+
]
|
80 |
+
|
81 |
+
found_contrib_display_url = get_file_url_from_repo(repo_full_name, contributing_paths_to_check, default_branch=branch_from_api)
|
82 |
+
|
83 |
+
if found_contrib_display_url:
|
84 |
+
guidelines_link_markdown = f"- **Guidelines Link:** [{found_contrib_display_url}]({found_contrib_display_url})"
|
85 |
+
|
86 |
+
path_that_worked = None
|
87 |
+
for p in contributing_paths_to_check: # Try to infer path for content fetching
|
88 |
+
if p.lower() in found_contrib_display_url.lower():
|
89 |
+
path_that_worked = p
|
90 |
+
break
|
91 |
+
|
92 |
+
if path_that_worked:
|
93 |
+
print(f"Kit Generator (_contrib_guidelines): Found guidelines at '{path_that_worked}'. Fetching content...")
|
94 |
+
contrib_content_text = get_file_content(repo_full_name, path_that_worked, branch=branch_from_api)
|
95 |
+
if contrib_content_text:
|
96 |
+
print("Kit Generator (_contrib_guidelines): Content fetched. Requesting LLM summary...")
|
97 |
+
summary = summarize_text_content(contrib_content_text, purpose="contribution guidelines")
|
98 |
+
if summary and "LLM Client not initialized" not in summary and "LLM API error" not in summary and "No content provided" not in summary:
|
99 |
+
summary_markdown = f"\n\n**Key Takeaways (AI Summary):**\n{summary}"
|
100 |
+
else:
|
101 |
+
summary_markdown = "\n\n_AI summary for contribution guidelines could not be generated at this time._"
|
102 |
+
print(f"Kit Generator (_contrib_guidelines): LLM summary failed or returned error: {summary}")
|
103 |
+
else:
|
104 |
+
summary_markdown = "\n\n_Could not fetch content of contribution guidelines for AI summary._"
|
105 |
+
else:
|
106 |
+
summary_markdown = "\n\n_Could not determine specific path of contribution file for AI summary, but a link was found._"
|
107 |
+
|
108 |
+
return f"{section_title}{guidelines_link_markdown}{summary_markdown}"
|
109 |
+
|
110 |
+
def _generate_modal_repo_structure_section(
|
111 |
+
issue_data: dict, # Contains repo_html_url and issue_body_snippet
|
112 |
+
language_searched: str
|
113 |
+
) -> str:
|
114 |
+
section_title = "## 📂 Quick Look: Repository Structure (via Modal)\n"
|
115 |
+
modal_file_listing_text = "_Could not retrieve repository file listing at this time._"
|
116 |
+
ai_suggested_files_text = ""
|
117 |
+
|
118 |
+
repo_html_url = issue_data.get("repository_html_url", "#")
|
119 |
+
issue_body_snippet = issue_data.get("body_snippet", "No issue description snippet available.")
|
120 |
+
|
121 |
+
if repo_html_url and repo_html_url != "#":
|
122 |
+
print(f"Kit Generator (_modal_structure): Requesting file listing for '{repo_html_url}' via Modal...")
|
123 |
+
clone_url_for_modal = repo_html_url if repo_html_url.endswith(".git") else repo_html_url + ".git"
|
124 |
+
modal_response = get_repo_file_listing_via_modal(clone_url_for_modal)
|
125 |
+
|
126 |
+
if modal_response and modal_response.get("status") == "success":
|
127 |
+
files_from_modal = modal_response.get("files", [])
|
128 |
+
if files_from_modal:
|
129 |
+
max_files_to_display = 15
|
130 |
+
file_list_items = [f"- `{item}`" for item in files_from_modal[:max_files_to_display]]
|
131 |
+
if len(files_from_modal) > max_files_to_display:
|
132 |
+
file_list_items.append(f"- ... and {len(files_from_modal) - max_files_to_display} more.")
|
133 |
+
modal_file_listing_text = ("Here's a quick look at some top-level files and folders:\n" +
|
134 |
+
"\n".join(file_list_items))
|
135 |
+
|
136 |
+
print("Kit Generator (_modal_structure): Sending file list and issue snippet to LLM for relevant file suggestions.")
|
137 |
+
ai_suggestions = suggest_relevant_code_locations(
|
138 |
+
issue_snippet=issue_body_snippet,
|
139 |
+
file_list=files_from_modal,
|
140 |
+
language=language_searched
|
141 |
+
)
|
142 |
+
if ai_suggestions and "LLM Client not initialized" not in ai_suggestions and "LLM API error" not in ai_suggestions:
|
143 |
+
ai_suggested_files_text = f"\n\n**💡 AI Suggested Starting Points (based on issue & file list):**\n{ai_suggestions}"
|
144 |
+
else:
|
145 |
+
ai_suggested_files_text = "\n\n_AI could not suggest specific files to start with for this issue at this time._"
|
146 |
+
else:
|
147 |
+
modal_file_listing_text = "_Repository cloned successfully via Modal, but no files were found at the top level._"
|
148 |
+
elif modal_response:
|
149 |
+
modal_file_listing_text = f"_Could not retrieve repository file listing via Modal: {modal_response.get('message', 'Unknown error from Modal')}_"
|
150 |
+
else:
|
151 |
+
modal_file_listing_text = "_Repository URL not available to fetch file listing._"
|
152 |
+
|
153 |
+
return f"{section_title}{modal_file_listing_text}{ai_suggested_files_text}"
|
154 |
+
|
155 |
+
|
156 |
+
# --- Main New Orchestrating Function ---
|
157 |
+
def generate_kit_from_plan(
|
158 |
+
issue_data: dict,
|
159 |
+
language_searched: str,
|
160 |
+
components_to_include: list[str]
|
161 |
+
) -> str:
|
162 |
+
"""
|
163 |
+
Generates Markdown content for an onboarding kit for a given issue,
|
164 |
+
based on a plan specifying which components to include.
|
165 |
+
"""
|
166 |
+
if not issue_data:
|
167 |
+
return "Error: No issue data provided to generate kit."
|
168 |
+
if not components_to_include:
|
169 |
+
return "Error: No components specified for kit generation plan."
|
170 |
+
|
171 |
+
print(f"Kit Generator (plan): Starting kit generation with components: {components_to_include}")
|
172 |
+
|
173 |
+
# Fetch common repo info once
|
174 |
+
repo_full_name, branch_from_api, default_branch_name = _get_common_repo_info(issue_data)
|
175 |
+
|
176 |
+
# Header for the kit
|
177 |
+
issue_title = issue_data.get("title", "N/A")
|
178 |
+
kit_header = f"""
|
179 |
+
# 👋 Onboarding Kit for: {issue_title}
|
180 |
+
|
181 |
+
Congratulations on choosing this issue! Here's some information to help you get started.
|
182 |
+
"""
|
183 |
+
markdown_parts = [kit_header.strip()]
|
184 |
+
|
185 |
+
# Generate sections based on the plan
|
186 |
+
if "repo_details_and_clone_command" in components_to_include:
|
187 |
+
print("Kit Generator (plan): Adding repo details and clone command.")
|
188 |
+
markdown_parts.append(_generate_repo_details_section(issue_data, default_branch_name))
|
189 |
+
|
190 |
+
# For guidelines, we handle link and summary together if summary is requested
|
191 |
+
# The LLM planner should ideally request "contribution_guidelines_summary_ai" which implies needing the link.
|
192 |
+
# If only "contribution_guidelines_link" is requested, we can adapt.
|
193 |
+
generate_guidelines_link = "contribution_guidelines_link" in components_to_include
|
194 |
+
generate_guidelines_summary = "contribution_guidelines_summary_ai" in components_to_include
|
195 |
+
|
196 |
+
if generate_guidelines_link or generate_guidelines_summary:
|
197 |
+
print("Kit Generator (plan): Adding contribution guidelines section (link and/or summary).")
|
198 |
+
# The helper function _generate_contribution_guidelines_section now handles both
|
199 |
+
# and will only generate summary if content is fetched.
|
200 |
+
# We can refine it to only fetch/summarize if generate_guidelines_summary is true.
|
201 |
+
# For now, let's make the helper smarter or adjust the planner.
|
202 |
+
#
|
203 |
+
# Simplified: Let _generate_contribution_guidelines_section do its thing.
|
204 |
+
# If only link was asked, it will still try to get content for summary but summary_markdown will be empty if not asked.
|
205 |
+
# This needs further refinement if we want to strictly adhere to planner *not* doing summary if not asked.
|
206 |
+
#
|
207 |
+
# Let's adjust the helper to accept a flag for summary.
|
208 |
+
# For now, the existing _generate_contribution_guidelines_section will attempt summary if content is found.
|
209 |
+
# We will rely on the LLM planner to be smart. If it asks for summary, it implies link is also useful.
|
210 |
+
guidelines_section_md = _generate_contribution_guidelines_section(repo_full_name, branch_from_api)
|
211 |
+
|
212 |
+
# Temporary fix: if only link is requested, strip summary if it was generated.
|
213 |
+
# This is a bit hacky, better to pass a flag to the helper.
|
214 |
+
if generate_guidelines_link and not generate_guidelines_summary:
|
215 |
+
if "**Key Takeaways (AI Summary):**" in guidelines_section_md:
|
216 |
+
guidelines_section_md = guidelines_section_md.split("**Key Takeaways (AI Summary):**")[0].strip()
|
217 |
+
|
218 |
+
markdown_parts.append(guidelines_section_md)
|
219 |
+
|
220 |
+
|
221 |
+
if "repository_structure_modal_ai" in components_to_include:
|
222 |
+
print("Kit Generator (plan): Adding repository structure (Modal) and AI file suggestions.")
|
223 |
+
markdown_parts.append(_generate_modal_repo_structure_section(issue_data, language_searched))
|
224 |
+
|
225 |
+
# Footer
|
226 |
+
markdown_parts.append("\nHappy contributing! Remember to communicate with the project maintainers if you have questions.")
|
227 |
+
|
228 |
+
return "\n\n".join(markdown_parts).strip()
|
229 |
+
|
230 |
+
|
231 |
+
# --- Keep your old generate_basic_kit_content for now, or comment out/remove if fully replaced ---
|
232 |
+
# def generate_basic_kit_content(issue_data: dict, language_searched: str) -> str:
|
233 |
+
# # ... your previous full implementation ...
|
234 |
+
# # This will eventually be replaced by calls to generate_kit_from_plan
|
235 |
+
# print("WARNING: generate_basic_kit_content is called, should be generate_kit_from_plan")
|
236 |
+
# # For now, let it be, app.py will call the new one once updated.
|
237 |
+
# # To avoid breaking app.py immediately, we can have it call the new function with a default plan.
|
238 |
+
default_plan_for_basic = [
|
239 |
+
"repo_details_and_clone_command",
|
240 |
+
"contribution_guidelines_link", # Basic just wants the link
|
241 |
+
# "contribution_guidelines_summary_ai", # Not in "basic"
|
242 |
+
"repository_structure_modal_ai" # Basic had the modal files and AI suggestions
|
243 |
+
]
|
244 |
+
return generate_kit_from_plan(issue_data, language_searched, default_plan_for_basic)
|
core/llm_handler.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openai # Using the openai library for Nebius's OpenAI-compatible API
|
2 |
+
import os # For environment variables if not using config_loader directly here
|
3 |
+
import json
|
4 |
+
# Import API key and base URL from our config loader
|
5 |
+
from utils.config_loader import OPENAI_API_KEY
|
6 |
+
|
7 |
+
# Initialize the OpenAI client
|
8 |
+
client = None
|
9 |
+
if OPENAI_API_KEY:
|
10 |
+
try:
|
11 |
+
client = openai.OpenAI(
|
12 |
+
api_key=OPENAI_API_KEY
|
13 |
+
# No base_url needed for direct OpenAI
|
14 |
+
)
|
15 |
+
print("OpenAI client initialized successfully in llm_handler.")
|
16 |
+
except Exception as e:
|
17 |
+
print(f"Error initializing OpenAI client in llm_handler: {e}")
|
18 |
+
client = None
|
19 |
+
else:
|
20 |
+
print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.")
|
21 |
+
|
22 |
+
|
23 |
+
def get_simple_issue_suggestion(
|
24 |
+
issues_data: list[dict],
|
25 |
+
language: str,
|
26 |
+
target_count: int = 1,
|
27 |
+
model_name: str = "gpt-4o-mini", # Or your preferred model
|
28 |
+
additional_prompt_context: str = "" # NEW parameter
|
29 |
+
) -> str | None:
|
30 |
+
"""
|
31 |
+
Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner.
|
32 |
+
"""
|
33 |
+
if not client:
|
34 |
+
print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.")
|
35 |
+
return "LLM client (OpenAI) not initialized. Check API Key configuration."
|
36 |
+
if not issues_data:
|
37 |
+
print("No issues provided to LLM for suggestion.")
|
38 |
+
return "No issues provided to LLM for suggestion."
|
39 |
+
|
40 |
+
prompt_issues_str = "" # Rebuild this based on your existing logic
|
41 |
+
for i, issue in enumerate(issues_data):
|
42 |
+
snippet = issue.get('body_snippet', 'No description available.')
|
43 |
+
title = issue.get('title', 'No title')
|
44 |
+
url = issue.get('html_url', '#')
|
45 |
+
labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels"
|
46 |
+
prompt_issues_str += (
|
47 |
+
f"\n--- Issue {i+1} ---\n"
|
48 |
+
f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n"
|
49 |
+
)
|
50 |
+
|
51 |
+
system_prompt = (
|
52 |
+
"You are an expert assistant helping a new open-source contributor. "
|
53 |
+
"Your task is to analyze the provided list of GitHub issues and recommend "
|
54 |
+
f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). "
|
55 |
+
"Consider factors like clarity, labels, and apparent scope. "
|
56 |
+
f"{additional_prompt_context}" # ADDED additional context here
|
57 |
+
" If the user-specified language seems mismatched with the provided issues, please make your best judgment "
|
58 |
+
"based on the issue content itself or note the potential mismatch in your recommendation."
|
59 |
+
)
|
60 |
+
user_prompt = (
|
61 |
+
# ... (user prompt construction as before, including prompt_issues_str) ...
|
62 |
+
f"Here is a list of GitHub issues found when searching for the language '{language}'. "
|
63 |
+
# (The additional_prompt_context is now in the system prompt)
|
64 |
+
f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. "
|
65 |
+
f"For each suggested issue, provide a concise explanation (1-2 sentences) stating *why* it's a good choice for a beginner. "
|
66 |
+
f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')."
|
67 |
+
f"\nHere are the issues:\n{prompt_issues_str}"
|
68 |
+
)
|
69 |
+
|
70 |
+
temperature_val = 0.4
|
71 |
+
max_tokens_val = 200 + (target_count * 150)
|
72 |
+
top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p
|
73 |
+
|
74 |
+
print(f"\nSending request to OpenAI LLM for issue suggestion...")
|
75 |
+
print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}")
|
76 |
+
|
77 |
+
try:
|
78 |
+
completion = client.chat.completions.create( # Ensure client is defined
|
79 |
+
model=model_name,
|
80 |
+
messages=[
|
81 |
+
{"role": "system", "content": system_prompt},
|
82 |
+
{"role": "user", "content": user_prompt}
|
83 |
+
],
|
84 |
+
# ... other params
|
85 |
+
temperature=0.4,
|
86 |
+
max_tokens=200 + (target_count * 150),
|
87 |
+
top_p=0.9
|
88 |
+
)
|
89 |
+
|
90 |
+
suggestion_text = completion.choices[0].message.content
|
91 |
+
print("OpenAI LLM Suggestion Received.")
|
92 |
+
return suggestion_text.strip()
|
93 |
+
|
94 |
+
except openai.APIConnectionError as e:
|
95 |
+
print(f"OpenAI API Connection Error: {e}")
|
96 |
+
return f"LLM suggestion failed due to connection error: {e}"
|
97 |
+
except openai.RateLimitError as e: # Good to handle this explicitly
|
98 |
+
print(f"OpenAI API Rate Limit Error: {e}")
|
99 |
+
return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage."
|
100 |
+
except openai.AuthenticationError as e: # Added for bad API key
|
101 |
+
print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.")
|
102 |
+
return f"LLM suggestion failed due to authentication error: {e}."
|
103 |
+
except openai.APIStatusError as e:
|
104 |
+
print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}")
|
105 |
+
return f"LLM suggestion failed due to API status error: {e.status_code}"
|
106 |
+
except Exception as e:
|
107 |
+
print(f"LLM API call to OpenAI failed with an unexpected error: {e}")
|
108 |
+
print(f"Type of error: {type(e)}")
|
109 |
+
return f"LLM suggestion failed with an unexpected error: {e}"
|
110 |
+
|
111 |
+
# --- NEW FUNCTION 1: Summarize Text Content ---
|
112 |
+
def summarize_text_content(
|
113 |
+
text_content: str,
|
114 |
+
purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section"
|
115 |
+
max_summary_tokens: int = 200, # Adjust as needed
|
116 |
+
model_name: str = "gpt-4o-mini" # Or your preferred model
|
117 |
+
) -> str | None:
|
118 |
+
"""
|
119 |
+
Summarizes a given text content using an LLM.
|
120 |
+
"""
|
121 |
+
if not client:
|
122 |
+
print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.")
|
123 |
+
return "LLM Client not initialized. Cannot summarize."
|
124 |
+
if not text_content or not text_content.strip():
|
125 |
+
print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.")
|
126 |
+
return "No content provided for summarization."
|
127 |
+
|
128 |
+
# Heuristic: If text is already short, just return it or a small part.
|
129 |
+
# This avoids wasting API calls on tiny texts. (Count words approx)
|
130 |
+
if len(text_content.split()) < 75 : # Arbitrary threshold for "short"
|
131 |
+
print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.")
|
132 |
+
return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content
|
133 |
+
|
134 |
+
|
135 |
+
system_prompt = (
|
136 |
+
f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. "
|
137 |
+
"Focus on the most critical information a new contributor would need. "
|
138 |
+
"For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. "
|
139 |
+
"Keep the summary brief and actionable."
|
140 |
+
)
|
141 |
+
user_prompt = (
|
142 |
+
f"Please summarize the key points of the following {purpose} document:\n\n"
|
143 |
+
f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM
|
144 |
+
# Using 8000 characters as a rough limit to fit within context windows & manage cost.
|
145 |
+
# Adjust this based on typical CONTRIBUTING.md length and model context limits.
|
146 |
+
)
|
147 |
+
|
148 |
+
print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}")
|
149 |
+
try:
|
150 |
+
completion = client.chat.completions.create(
|
151 |
+
model=model_name,
|
152 |
+
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
153 |
+
temperature=0.2, # Lower temperature for factual summarization
|
154 |
+
max_tokens=max_summary_tokens,
|
155 |
+
top_p=1.0
|
156 |
+
)
|
157 |
+
summary_text = completion.choices[0].message.content
|
158 |
+
print(f"LLM Handler: Summary for {purpose} received.")
|
159 |
+
return summary_text.strip()
|
160 |
+
except Exception as e:
|
161 |
+
print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}")
|
162 |
+
return f"Could not summarize the {purpose}: LLM API error."
|
163 |
+
|
164 |
+
# --- NEW FUNCTION 2: Suggest Relevant Code Locations ---
|
165 |
+
def suggest_relevant_code_locations(
|
166 |
+
issue_snippet: str,
|
167 |
+
file_list: list[str],
|
168 |
+
language: str, # Language of the project
|
169 |
+
max_suggestion_tokens: int = 200, # Adjust as needed
|
170 |
+
model_name: str = "gpt-4o-mini" # Or your preferred model
|
171 |
+
) -> str | None:
|
172 |
+
"""
|
173 |
+
Suggests relevant files/folders based on an issue snippet and a list of files.
|
174 |
+
"""
|
175 |
+
if not client:
|
176 |
+
print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.")
|
177 |
+
return "LLM Client not initialized. Cannot suggest locations."
|
178 |
+
if not issue_snippet or not issue_snippet.strip():
|
179 |
+
return "No issue description provided to suggest locations."
|
180 |
+
if not file_list:
|
181 |
+
return "No file list provided to suggest locations from."
|
182 |
+
|
183 |
+
# Format file list for the prompt
|
184 |
+
formatted_file_list = "\n".join([f"- `{f}`" for f in file_list])
|
185 |
+
if not formatted_file_list: # Should not happen if file_list is not empty
|
186 |
+
formatted_file_list = "No files listed."
|
187 |
+
|
188 |
+
system_prompt = (
|
189 |
+
f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. "
|
190 |
+
"Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders."
|
191 |
+
)
|
192 |
+
user_prompt = (
|
193 |
+
f"A developer is starting work on an issue with the following description snippet:\n"
|
194 |
+
f"'''\n{issue_snippet}\n'''\n\n"
|
195 |
+
f"The top-level files and folders available in the repository are:\n"
|
196 |
+
f"{formatted_file_list}\n\n"
|
197 |
+
f"Based *only* on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. "
|
198 |
+
f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. "
|
199 |
+
f"If no files seem obviously relevant from the top-level list, say so."
|
200 |
+
)
|
201 |
+
|
202 |
+
print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}")
|
203 |
+
try:
|
204 |
+
completion = client.chat.completions.create(
|
205 |
+
model=model_name,
|
206 |
+
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
207 |
+
temperature=0.5, # Moderate temperature for some reasoning
|
208 |
+
max_tokens=max_suggestion_tokens,
|
209 |
+
top_p=1.0
|
210 |
+
)
|
211 |
+
suggestion_text = completion.choices[0].message.content
|
212 |
+
print("LLM Handler: Code location suggestions received.")
|
213 |
+
return suggestion_text.strip()
|
214 |
+
except Exception as e:
|
215 |
+
print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}")
|
216 |
+
return f"Could not suggest code locations: LLM API error."
|
217 |
+
|
218 |
+
def plan_onboarding_kit_components(
|
219 |
+
issue_data: dict,
|
220 |
+
language_searched: str,
|
221 |
+
model_name: str = "gpt-4.1-mini" # Or your preferred model
|
222 |
+
) -> dict | None:
|
223 |
+
"""
|
224 |
+
Uses an LLM to decide which onboarding kit components are most relevant for a given issue.
|
225 |
+
Returns a dictionary based on the LLM's JSON output.
|
226 |
+
"""
|
227 |
+
if not client:
|
228 |
+
print("ERROR (llm_handler.plan_kit): LLM client not initialized.")
|
229 |
+
return None # Or: {"error": "LLM Client not initialized"}
|
230 |
+
if not issue_data:
|
231 |
+
print("ERROR (llm_handler.plan_kit): No issue data provided for planning.")
|
232 |
+
return None # Or: {"error": "No issue data"}
|
233 |
+
|
234 |
+
issue_title = issue_data.get("title", "N/A")
|
235 |
+
issue_snippet = issue_data.get("body_snippet", "No description available.")
|
236 |
+
issue_labels = issue_data.get("labels", [])
|
237 |
+
|
238 |
+
# Define available kit components for the LLM to choose from
|
239 |
+
available_components = [
|
240 |
+
"repo_details_and_clone_command", # Basic repo info, clone command
|
241 |
+
"contribution_guidelines_link", # Link to CONTRIBUTING.md
|
242 |
+
"contribution_guidelines_summary_ai", # AI Summary of CONTRIBUTING.md
|
243 |
+
"repository_structure_modal_ai", # File listing via Modal + AI suggested files
|
244 |
+
# We could break down "repository_structure_modal_ai" further if needed:
|
245 |
+
# "repository_files_modal_raw_list",
|
246 |
+
# "ai_suggested_start_files_from_list"
|
247 |
+
]
|
248 |
+
components_description = (
|
249 |
+
"- repo_details_and_clone_command: Basic repository information and git clone command.\n"
|
250 |
+
"- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n"
|
251 |
+
"- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n"
|
252 |
+
"- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue."
|
253 |
+
)
|
254 |
+
|
255 |
+
system_prompt = (
|
256 |
+
"You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan "
|
257 |
+
"the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. "
|
258 |
+
"You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, "
|
259 |
+
"where each string is one of the component names provided."
|
260 |
+
)
|
261 |
+
user_prompt = (
|
262 |
+
f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n"
|
263 |
+
f"Issue Title: \"{issue_title}\"\n"
|
264 |
+
f"Issue Snippet: \"{issue_snippet}\"\n"
|
265 |
+
f"Issue Labels: {issue_labels}\n\n"
|
266 |
+
f"And considering the following available onboarding kit components and their descriptions:\n"
|
267 |
+
f"{components_description}\n\n"
|
268 |
+
f"Which components should be included in the onboarding kit for this specific issue to be most helpful? "
|
269 |
+
f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. "
|
270 |
+
f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). "
|
271 |
+
f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: "
|
272 |
+
f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}"
|
273 |
+
)
|
274 |
+
|
275 |
+
print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}")
|
276 |
+
try:
|
277 |
+
# Forcing JSON response mode if available and model supports it well
|
278 |
+
# gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well.
|
279 |
+
# For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models.
|
280 |
+
completion_params = {
|
281 |
+
"model": model_name,
|
282 |
+
"messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
|
283 |
+
"temperature": 0.2, # Low temperature for more deterministic structural output
|
284 |
+
"max_tokens": 200, # JSON output should be relatively small
|
285 |
+
"top_p": 1.0,
|
286 |
+
}
|
287 |
+
# Check if the model might be one that supports explicit JSON mode via response_format
|
288 |
+
if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known
|
289 |
+
completion_params["response_format"] = {"type": "json_object"}
|
290 |
+
|
291 |
+
|
292 |
+
completion = client.chat.completions.create(**completion_params)
|
293 |
+
|
294 |
+
raw_response_content = completion.choices[0].message.content
|
295 |
+
print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}")
|
296 |
+
|
297 |
+
# Attempt to parse the JSON
|
298 |
+
parsed_plan = json.loads(raw_response_content)
|
299 |
+
if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list):
|
300 |
+
# Further validation: ensure all component names are valid (optional but good)
|
301 |
+
valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components]
|
302 |
+
if len(valid_components) != len(parsed_plan["include_components"]):
|
303 |
+
print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.")
|
304 |
+
|
305 |
+
final_plan = {"include_components": valid_components}
|
306 |
+
print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}")
|
307 |
+
return final_plan
|
308 |
+
else:
|
309 |
+
print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).")
|
310 |
+
return {"error": "LLM response format error", "details": "Missing 'include_components' list."}
|
311 |
+
|
312 |
+
except json.JSONDecodeError as json_e:
|
313 |
+
print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}")
|
314 |
+
return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content}
|
315 |
+
except Exception as e:
|
316 |
+
print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}")
|
317 |
+
return {"error": f"LLM API call failed: {str(e)}"}
|
core/modal_processor.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# core/modal_processor.py
|
2 |
+
|
3 |
+
# CHANGE THIS IMPORT:
|
4 |
+
# from ..modal_definitions import stub as an_individual_modal_app_instance_name
|
5 |
+
# from ..modal_definitions import clone_and_list_files_on_modal
|
6 |
+
# TO THIS (assuming modal_definitions.py is in the project root):
|
7 |
+
from modal_definitions import stub as an_individual_modal_app_instance_name
|
8 |
+
from modal_definitions import clone_and_list_files_on_modal
|
9 |
+
|
10 |
+
# (get_repo_file_listing_via_modal function definition remains the same)
|
11 |
+
def get_repo_file_listing_via_modal(repo_url: str) -> dict | None:
|
12 |
+
if not repo_url:
|
13 |
+
print("Error (modal_processor): No repository URL provided.")
|
14 |
+
return {"status": "error", "message": "No repository URL provided."}
|
15 |
+
|
16 |
+
print(f"Modal Processor: Attempting to get file listing for {repo_url} via Modal...")
|
17 |
+
try:
|
18 |
+
with an_individual_modal_app_instance_name.run():
|
19 |
+
result_dict = clone_and_list_files_on_modal.remote(repo_url)
|
20 |
+
print(f"Modal Processor: Result received from Modal for {repo_url}: {result_dict}")
|
21 |
+
return result_dict
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error (modal_processor): Failed to invoke or communicate with Modal function for {repo_url}. Exception: {e}")
|
24 |
+
return {"status": "error", "message": f"Failed to invoke Modal function: {str(e)}"}
|
25 |
+
|
26 |
+
|
27 |
+
if __name__ == '__main__':
|
28 |
+
# This block is for when you run `python core/modal_processor.py` OR `python -m core.modal_processor`
|
29 |
+
# FROM THE PROJECT ROOT (contrib_navigator/)
|
30 |
+
# The imports at the top of the file (`from modal_definitions import ...`) should now work
|
31 |
+
# because Python adds the directory of the script being run (or -m target) to sys.path.
|
32 |
+
# If running `python -m core.modal_processor` from `contrib_navigator/`, then `contrib_navigator/` is in sys.path.
|
33 |
+
|
34 |
+
print("Running modal_processor.py directly for testing...")
|
35 |
+
test_url_gradio = "https://github.com/gradio-app/gradio.git"
|
36 |
+
|
37 |
+
print(f"\nTesting with URL: {test_url_gradio}")
|
38 |
+
response = get_repo_file_listing_via_modal(test_url_gradio)
|
39 |
+
if response and response.get("status") == "success":
|
40 |
+
print(f"Success! Files for {test_url_gradio}: {response.get('files')[:5]}...") # Print first 5 files
|
41 |
+
else:
|
42 |
+
print(f"Failed or got unexpected response for {test_url_gradio}: {response}")
|
43 |
+
|
44 |
+
# You can add back other test cases here if desired, for example:
|
45 |
+
# test_url_problematic = "https://github.com/git-guides/install-git.git"
|
46 |
+
# print(f"\nTesting with problematic URL: {test_url_problematic}")
|
47 |
+
# response_problem = get_repo_file_listing_via_modal(test_url_problematic)
|
48 |
+
# if response_problem and response_problem.get("status") == "error":
|
49 |
+
# print(f"Correctly received error for {test_url_problematic}: {response_problem.get('message')}")
|
50 |
+
# else:
|
51 |
+
# print(f"Unexpected response for {test_url_problematic}: {response_problem}")
|
modal_definitions.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# modal_definitions.py
|
2 |
+
import modal
|
3 |
+
import subprocess
|
4 |
+
import tempfile
|
5 |
+
import os
|
6 |
+
|
7 |
+
# (stub and git_image definitions remain the same)
|
8 |
+
stub = modal.App(name="contrib-navigator-repo-inspector")
|
9 |
+
git_image = (
|
10 |
+
modal.Image.debian_slim(python_version="3.12")
|
11 |
+
.apt_install("git")
|
12 |
+
)
|
13 |
+
|
14 |
+
# (clone_and_list_files_on_modal function remains the same)
|
15 |
+
@stub.function(
|
16 |
+
image=git_image,
|
17 |
+
timeout=120,
|
18 |
+
retries=modal.Retries(max_retries=1, initial_delay=2.0, backoff_coefficient=1.0)
|
19 |
+
)
|
20 |
+
def clone_and_list_files_on_modal(repo_url: str) -> dict:
|
21 |
+
# ... (function body is correct as you have it) ...
|
22 |
+
print(f"Modal function received URL to clone: {repo_url}")
|
23 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
24 |
+
repo_subdir_name = "cloned_repo"
|
25 |
+
clone_target_path = os.path.join(tmpdir, repo_subdir_name)
|
26 |
+
try:
|
27 |
+
command = ["git", "clone", "--depth", "1", repo_url, clone_target_path]
|
28 |
+
print(f"Executing command in Modal: {' '.join(command)}")
|
29 |
+
result = subprocess.run(
|
30 |
+
command, check=True, capture_output=True, text=True, timeout=90
|
31 |
+
)
|
32 |
+
cloned_files = os.listdir(clone_target_path)
|
33 |
+
print(f"Successfully cloned and listed files for {repo_url}. Files: {cloned_files}")
|
34 |
+
return {"status": "success", "files": cloned_files, "cloned_path_on_modal": clone_target_path}
|
35 |
+
except subprocess.TimeoutExpired:
|
36 |
+
error_message = f"Git clone command timed out in Modal for {repo_url}."
|
37 |
+
print(error_message)
|
38 |
+
return {"status": "error", "message": error_message}
|
39 |
+
except subprocess.CalledProcessError as e:
|
40 |
+
error_message = (
|
41 |
+
f"Failed to clone {repo_url} in Modal. "
|
42 |
+
f"Git command return code: {e.returncode}. "
|
43 |
+
f"Stderr: {e.stderr.strip() if e.stderr else 'N/A'}. "
|
44 |
+
f"Stdout: {e.stdout.strip() if e.stdout else 'N/A'}."
|
45 |
+
)
|
46 |
+
print(error_message)
|
47 |
+
return {"status": "error", "message": error_message}
|
48 |
+
except FileNotFoundError:
|
49 |
+
error_message = "Git command not found in Modal environment. Image build issue."
|
50 |
+
print(error_message)
|
51 |
+
return {"status": "error", "message": error_message}
|
52 |
+
except Exception as e:
|
53 |
+
error_message = f"An unexpected error occurred in Modal function for {repo_url}: {str(e)}"
|
54 |
+
print(error_message)
|
55 |
+
return {"status": "error", "message": error_message}
|
56 |
+
|
57 |
+
|
58 |
+
# --- Optional: Local testing entrypoint for this Modal function ---
|
59 |
+
@stub.local_entrypoint()
|
60 |
+
async def test_clone_function_on_modal():
|
61 |
+
# Test with a known public repository that works
|
62 |
+
test_repo_url_successful = "https://github.com/gradio-app/gradio.git"
|
63 |
+
print(f"\n[Local Test] Calling Modal function for successful clone: {test_repo_url_successful}")
|
64 |
+
result_successful = await clone_and_list_files_on_modal.remote.aio(test_repo_url_successful)
|
65 |
+
print(f"[Local Test] Result from Modal for successful clone: {result_successful}\n")
|
66 |
+
|
67 |
+
# --- This section is now fully commented out to avoid NameError ---
|
68 |
+
# test_repo_url_small = "https://github.com/git-guides/install-git.git" # A very small repo
|
69 |
+
# print(f"\n[Local Test] Calling Modal function for: {test_repo_url_small}")
|
70 |
+
# result_small = await clone_and_list_files_on_modal.remote.aio(test_repo_url_small)
|
71 |
+
# print(f"[Local Test] Result from Modal for small repo: {result_small}\n")
|
72 |
+
# --- End of commented out section ---
|
73 |
+
|
74 |
+
# Test with a non-existent repo URL (expecting an error)
|
75 |
+
# non_existent_repo_url = "https://github.com/this/repo-does-not-exist12345.git"
|
76 |
+
# print(f"\n[Local Test] Calling Modal function for non-existent repo: {non_existent_repo_url}")
|
77 |
+
# result_non_existent = await clone_and_list_files_on_modal.remote.aio(non_existent_repo_url)
|
78 |
+
# print(f"[Local Test] Result from Modal for non-existent repo: {result_non_existent}\n")
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python-dotenv
|
2 |
+
requests
|
3 |
+
openai
|
4 |
+
gradio
|
5 |
+
modal
|
run_day1_tests.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from core.github_client import fetch_beginner_issues
|
2 |
+
from core.llm_handler import get_simple_issue_suggestion
|
3 |
+
import utils.config_loader # This loads .env and makes variables available
|
4 |
+
|
5 |
+
def main_test_runner():
|
6 |
+
print("--- ContribNavigator Day 1 Test Runner ---")
|
7 |
+
|
8 |
+
# Check for GitHub PAT
|
9 |
+
if not utils.config_loader.GITHUB_PAT:
|
10 |
+
print("CRITICAL: GITHUB_PAT not loaded. Please check your .env file.")
|
11 |
+
return # Stop if GitHub PAT is missing, as it's essential for the first step
|
12 |
+
|
13 |
+
# Check for OpenAI API Key
|
14 |
+
if not utils.config_loader.OPENAI_API_KEY: # CHANGED from Nebius check
|
15 |
+
print("CRITICAL: OPENAI_API_KEY not loaded. LLM calls will fail. Please check your .env file.")
|
16 |
+
# We might still want to see GitHub issues, so we don't return immediately,
|
17 |
+
# but the LLM part will be skipped or will show an error from llm_handler.
|
18 |
+
else:
|
19 |
+
print("OpenAI API Key found. Ready for LLM tests.")
|
20 |
+
|
21 |
+
|
22 |
+
print("\n--- Testing GitHub Issue Fetching ---")
|
23 |
+
target_language = "python"
|
24 |
+
print(f"Attempting to fetch up to 5 '{target_language}' issues with default labels...")
|
25 |
+
# Using default labels from fetch_beginner_issues: ["good first issue", "help wanted"]
|
26 |
+
issues = fetch_beginner_issues(target_language, per_page=5)
|
27 |
+
|
28 |
+
if issues is None:
|
29 |
+
print("Failed to fetch issues from GitHub. There might have been an API request error. Check console for details from github_client.")
|
30 |
+
elif not issues:
|
31 |
+
print(f"No issues found for '{target_language}' with the default labels. Cannot proceed to LLM test.")
|
32 |
+
else:
|
33 |
+
print(f"\nSuccessfully fetched {len(issues)} issues for '{target_language}':")
|
34 |
+
for i, issue_item in enumerate(issues):
|
35 |
+
print(f" {i+1}. Title: {issue_item.get('title')}")
|
36 |
+
print(f" URL: {issue_item.get('html_url')}")
|
37 |
+
print(f" Repo: {issue_item.get('repository_html_url')}")
|
38 |
+
print(f" Labels: {issue_item.get('labels')}")
|
39 |
+
print("-" * 20)
|
40 |
+
|
41 |
+
# --- NOW TESTING LLM Handler with OpenAI ---
|
42 |
+
print("\n--- Testing LLM Suggestion (OpenAI) ---")
|
43 |
+
if not utils.config_loader.OPENAI_API_KEY: # CHANGED from Nebius check
|
44 |
+
print("OPENAI_API_KEY not configured in .env. Skipping LLM test.")
|
45 |
+
else:
|
46 |
+
# Let's send the first 2 or 3 issues to the LLM for suggestion
|
47 |
+
issues_for_llm = issues[:3] # Send up to the first 3 issues
|
48 |
+
if issues_for_llm:
|
49 |
+
print(f"\nSending {len(issues_for_llm)} issue(s) to OpenAI LLM for suggestion (expecting 1 suggestion)...")
|
50 |
+
# Get 1 suggestion for these issues
|
51 |
+
suggestion = get_simple_issue_suggestion(issues_for_llm, target_language, target_count=1) # Uses default model "gpt-3.5-turbo"
|
52 |
+
|
53 |
+
print("\nLLM Suggestion Output:")
|
54 |
+
if suggestion:
|
55 |
+
print(suggestion)
|
56 |
+
else:
|
57 |
+
# llm_handler should print specific errors if any occurred during the API call
|
58 |
+
print("LLM did not return a suggestion or an error occurred (see logs above from llm_handler).")
|
59 |
+
else:
|
60 |
+
# This case should not happen if 'issues' list was populated
|
61 |
+
print("No issues were available to send to LLM for suggestion.")
|
62 |
+
|
63 |
+
print("\n--- Day 1 Full Test Complete ---")
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
main_test_runner()
|
utils/config_loader.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
def load_app_config():
|
5 |
+
"""Loads environment variables from .env file.""" # Corrected docstring quotes
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
# Load config immediately when this module is imported
|
9 |
+
load_app_config()
|
10 |
+
|
11 |
+
# Expose specific config values
|
12 |
+
GITHUB_PAT = os.getenv("GITHUB_PAT")
|
13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
+
|
15 |
+
# Add MODAL keys here later when we get to Modal setup (e.g., MODAL_TOKEN_ID, MODAL_TOKEN_SECRET if needed for scripts)
|
16 |
+
|
17 |
+
# Optional: Add checks or print statements for debugging if values are None during startup
|
18 |
+
if not GITHUB_PAT: print("WARNING: GITHUB_PAT not found in .env")
|
19 |
+
if not OPENAI_API_KEY: print("WARNING: OPENAI_API_KEY not found in .env")
|