Spaces:

Agents-MCP-Hackathon
/

ContribNavigator

Running

App Files Files Community

ContribNavigator / core /llm_handler.py

MoHamdyy

added all files

4b4b93c 4 days ago

raw

history blame contribute delete

17.3 kB

	import openai # Using the openai library for Nebius's OpenAI-compatible API
	import os # For environment variables if not using config_loader directly here
	import json
	# Import API key and base URL from our config loader
	from utils.config_loader import OPENAI_API_KEY

	# Initialize the OpenAI client
	client = None
	if OPENAI_API_KEY:
	try:
	client = openai.OpenAI(
	api_key=OPENAI_API_KEY
	# No base_url needed for direct OpenAI
	)
	print("OpenAI client initialized successfully in llm_handler.")
	except Exception as e:
	print(f"Error initializing OpenAI client in llm_handler: {e}")
	client = None
	else:
	print("WARNING (llm_handler): OPENAI_API_KEY not configured. LLM calls will fail.")


	def get_simple_issue_suggestion(
	issues_data: list[dict],
	language: str,
	target_count: int = 1,
	model_name: str = "gpt-4o-mini", # Or your preferred model
	additional_prompt_context: str = "" # NEW parameter
	) -> str \| None:
	"""
	Sends issue data to OpenAI API to suggest which one(s) might be best for a beginner.
	"""
	if not client:
	print("LLM client (OpenAI) in get_simple_issue_suggestion is not initialized.")
	return "LLM client (OpenAI) not initialized. Check API Key configuration."
	if not issues_data:
	print("No issues provided to LLM for suggestion.")
	return "No issues provided to LLM for suggestion."

	prompt_issues_str = "" # Rebuild this based on your existing logic
	for i, issue in enumerate(issues_data):
	snippet = issue.get('body_snippet', 'No description available.')
	title = issue.get('title', 'No title')
	url = issue.get('html_url', '#')
	labels = ", ".join(issue.get('labels', [])) if issue.get('labels') else "No labels"
	prompt_issues_str += (
	f"\n--- Issue {i+1} ---\n"
	f"Title: {title}\nURL: {url}\nLabels: {labels}\nSnippet from body: {snippet}\n-----------------\n"
	)

	system_prompt = (
	"You are an expert assistant helping a new open-source contributor. "
	"Your task is to analyze the provided list of GitHub issues and recommend "
	f"the top {target_count} that would be most suitable for a beginner ideally in {language} (if specified and makes sense for the issues). "
	"Consider factors like clarity, labels, and apparent scope. "
	f"{additional_prompt_context}" # ADDED additional context here
	" If the user-specified language seems mismatched with the provided issues, please make your best judgment "
	"based on the issue content itself or note the potential mismatch in your recommendation."
	)
	user_prompt = (
	# ... (user prompt construction as before, including prompt_issues_str) ...
	f"Here is a list of GitHub issues found when searching for the language '{language}'. "
	# (The additional_prompt_context is now in the system prompt)
	f"Please review them and suggest the top {target_count} issue(s) that seem most suitable for a beginner. "
	f"For each suggested issue, provide a concise explanation (1-2 sentences) stating why it's a good choice for a beginner. "
	f"If you suggest an issue, please refer to it by its number (e.g., 'Issue 1')."
	f"\nHere are the issues:\n{prompt_issues_str}"
	)

	temperature_val = 0.4
	max_tokens_val = 200 + (target_count * 150)
	top_p_val = 0.9 # Usually 1.0 for temperature-based sampling, or 0.9 if also using top_p

	print(f"\nSending request to OpenAI LLM for issue suggestion...")
	print(f"Model: {model_name}, Temp: {temperature_val}, MaxTokens: {max_tokens_val}")

	try:
	completion = client.chat.completions.create( # Ensure client is defined
	model=model_name,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	# ... other params
	temperature=0.4,
	max_tokens=200 + (target_count * 150),
	top_p=0.9
	)

	suggestion_text = completion.choices[0].message.content
	print("OpenAI LLM Suggestion Received.")
	return suggestion_text.strip()

	except openai.APIConnectionError as e:
	print(f"OpenAI API Connection Error: {e}")
	return f"LLM suggestion failed due to connection error: {e}"
	except openai.RateLimitError as e: # Good to handle this explicitly
	print(f"OpenAI API Rate Limit Error: {e}")
	return f"LLM suggestion failed due to rate limit: {e}. Check your OpenAI plan and usage."
	except openai.AuthenticationError as e: # Added for bad API key
	print(f"OpenAI API Authentication Error: {e}. Check your OPENAI_API_KEY.")
	return f"LLM suggestion failed due to authentication error: {e}."
	except openai.APIStatusError as e:
	print(f"OpenAI API Status Error: Status {e.status_code} - Response: {e.response}")
	return f"LLM suggestion failed due to API status error: {e.status_code}"
	except Exception as e:
	print(f"LLM API call to OpenAI failed with an unexpected error: {e}")
	print(f"Type of error: {type(e)}")
	return f"LLM suggestion failed with an unexpected error: {e}"

	# --- NEW FUNCTION 1: Summarize Text Content ---
	def summarize_text_content(
	text_content: str,
	purpose: str = "contribution guidelines", # e.g., "issue description", "documentation section"
	max_summary_tokens: int = 200, # Adjust as needed
	model_name: str = "gpt-4o-mini" # Or your preferred model
	) -> str \| None:
	"""
	Summarizes a given text content using an LLM.
	"""
	if not client:
	print("ERROR (llm_handler.summarize_text_content): LLM client not initialized.")
	return "LLM Client not initialized. Cannot summarize."
	if not text_content or not text_content.strip():
	print("Warning (llm_handler.summarize_text_content): No text content provided to summarize.")
	return "No content provided for summarization."

	# Heuristic: If text is already short, just return it or a small part.
	# This avoids wasting API calls on tiny texts. (Count words approx)
	if len(text_content.split()) < 75 : # Arbitrary threshold for "short"
	print("Info (llm_handler.summarize_text_content): Content too short, returning as is or snippet.")
	return f"The {purpose} document is brief: \"{text_content[:500]}...\"" if len(text_content) > 500 else text_content


	system_prompt = (
	f"You are an expert summarizer. Your task is to provide a concise summary of the following '{purpose}' document. "
	"Focus on the most critical information a new contributor would need. "
	"For contribution guidelines, highlight key setup steps, coding style conventions, testing requirements, and pull request procedures. "
	"Keep the summary brief and actionable."
	)
	user_prompt = (
	f"Please summarize the key points of the following {purpose} document:\n\n"
	f"```text\n{text_content[:8000]}\n```" # Limit context sent to LLM
	# Using 8000 characters as a rough limit to fit within context windows & manage cost.
	# Adjust this based on typical CONTRIBUTING.md length and model context limits.
	)

	print(f"LLM Handler: Sending request to summarize {purpose}. Model: {model_name}")
	try:
	completion = client.chat.completions.create(
	model=model_name,
	messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
	temperature=0.2, # Lower temperature for factual summarization
	max_tokens=max_summary_tokens,
	top_p=1.0
	)
	summary_text = completion.choices[0].message.content
	print(f"LLM Handler: Summary for {purpose} received.")
	return summary_text.strip()
	except Exception as e:
	print(f"ERROR (llm_handler.summarize_text_content): LLM API call failed: {e}")
	return f"Could not summarize the {purpose}: LLM API error."

	# --- NEW FUNCTION 2: Suggest Relevant Code Locations ---
	def suggest_relevant_code_locations(
	issue_snippet: str,
	file_list: list[str],
	language: str, # Language of the project
	max_suggestion_tokens: int = 200, # Adjust as needed
	model_name: str = "gpt-4o-mini" # Or your preferred model
	) -> str \| None:
	"""
	Suggests relevant files/folders based on an issue snippet and a list of files.
	"""
	if not client:
	print("ERROR (llm_handler.suggest_relevant_code_locations): LLM client not initialized.")
	return "LLM Client not initialized. Cannot suggest locations."
	if not issue_snippet or not issue_snippet.strip():
	return "No issue description provided to suggest locations."
	if not file_list:
	return "No file list provided to suggest locations from."

	# Format file list for the prompt
	formatted_file_list = "\n".join([f"- `{f}`" for f in file_list])
	if not formatted_file_list: # Should not happen if file_list is not empty
	formatted_file_list = "No files listed."

	system_prompt = (
	f"You are an AI assistant helping a software developer navigate a new '{language}' codebase. "
	"Your goal is to identify potentially relevant files or folders for a given issue, based on a provided list of top-level project files/folders."
	)
	user_prompt = (
	f"A developer is starting work on an issue with the following description snippet:\n"
	f"'''\n{issue_snippet}\n'''\n\n"
	f"The top-level files and folders available in the repository are:\n"
	f"{formatted_file_list}\n\n"
	f"Based only on the issue snippet and this file list, please suggest 2-3 files or folders that might be most relevant for investigating this issue. "
	f"For each suggestion, provide a brief (1-sentence) explanation of why it might be relevant. "
	f"If no files seem obviously relevant from the top-level list, say so."
	)

	print(f"LLM Handler: Sending request to suggest relevant code locations. Model: {model_name}")
	try:
	completion = client.chat.completions.create(
	model=model_name,
	messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
	temperature=0.5, # Moderate temperature for some reasoning
	max_tokens=max_suggestion_tokens,
	top_p=1.0
	)
	suggestion_text = completion.choices[0].message.content
	print("LLM Handler: Code location suggestions received.")
	return suggestion_text.strip()
	except Exception as e:
	print(f"ERROR (llm_handler.suggest_relevant_code_locations): LLM API call failed: {e}")
	return f"Could not suggest code locations: LLM API error."

	def plan_onboarding_kit_components(
	issue_data: dict,
	language_searched: str,
	model_name: str = "gpt-4.1-mini" # Or your preferred model
	) -> dict \| None:
	"""
	Uses an LLM to decide which onboarding kit components are most relevant for a given issue.
	Returns a dictionary based on the LLM's JSON output.
	"""
	if not client:
	print("ERROR (llm_handler.plan_kit): LLM client not initialized.")
	return None # Or: {"error": "LLM Client not initialized"}
	if not issue_data:
	print("ERROR (llm_handler.plan_kit): No issue data provided for planning.")
	return None # Or: {"error": "No issue data"}

	issue_title = issue_data.get("title", "N/A")
	issue_snippet = issue_data.get("body_snippet", "No description available.")
	issue_labels = issue_data.get("labels", [])

	# Define available kit components for the LLM to choose from
	available_components = [
	"repo_details_and_clone_command", # Basic repo info, clone command
	"contribution_guidelines_link", # Link to CONTRIBUTING.md
	"contribution_guidelines_summary_ai", # AI Summary of CONTRIBUTING.md
	"repository_structure_modal_ai", # File listing via Modal + AI suggested files
	# We could break down "repository_structure_modal_ai" further if needed:
	# "repository_files_modal_raw_list",
	# "ai_suggested_start_files_from_list"
	]
	components_description = (
	"- repo_details_and_clone_command: Basic repository information and git clone command.\n"
	"- contribution_guidelines_link: A direct link to the project's CONTRIBUTING.md file (if found).\n"
	"- contribution_guidelines_summary_ai: An AI-generated summary of the key points from CONTRIBUTING.md.\n"
	"- repository_structure_modal_ai: A top-level file/folder listing from a repository clone (via Modal), followed by AI suggestions for relevant files based on the issue."
	)

	system_prompt = (
	"You are an expert onboarding assistant for open-source contributors. Your task is to intelligently plan "
	"the components of an onboarding kit that would be most helpful for a developer tackling a specific GitHub issue. "
	"You must respond ONLY with a valid JSON object containing a single key 'include_components' whose value is a list of strings, "
	"where each string is one of the component names provided."
	)
	user_prompt = (
	f"Based on the following GitHub issue details for a project searched under the language context '{language_searched}':\n"
	f"Issue Title: \"{issue_title}\"\n"
	f"Issue Snippet: \"{issue_snippet}\"\n"
	f"Issue Labels: {issue_labels}\n\n"
	f"And considering the following available onboarding kit components and their descriptions:\n"
	f"{components_description}\n\n"
	f"Which components should be included in the onboarding kit for this specific issue to be most helpful? "
	f"For example, if the issue is a very simple documentation typo, a full 'repository_structure_modal_ai' might be overkill. "
	f"If no contribution guidelines are typically found for a project, 'contribution_guidelines_summary_ai' would not be applicable. (You don't know this yet, but keep it in mind for general reasoning). "
	f"Prioritize helpfulness for a beginner. Respond ONLY with a JSON object in the format: "
	f"{{\"include_components\": [\"component_name_1\", \"component_name_2\", ...]}}"
	)

	print(f"LLM Handler (plan_kit): Sending request to plan kit components. Model: {model_name}")
	try:
	# Forcing JSON response mode if available and model supports it well
	# gpt-4o-mini and newer gpt-3.5-turbo models usually handle "Respond ONLY with a valid JSON" well.
	# For stronger enforcement, you can use response_format={"type": "json_object"} with compatible models.
	completion_params = {
	"model": model_name,
	"messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
	"temperature": 0.2, # Low temperature for more deterministic structural output
	"max_tokens": 200, # JSON output should be relatively small
	"top_p": 1.0,
	}
	# Check if the model might be one that supports explicit JSON mode via response_format
	if "gpt-4o" in model_name or "gpt-3.5-turbo-0125" in model_name or "gpt-3.5-turbo-1106" in model_name: # Add other compatible models if known
	completion_params["response_format"] = {"type": "json_object"}


	completion = client.chat.completions.create(**completion_params)

	raw_response_content = completion.choices[0].message.content
	print(f"LLM Handler (plan_kit): Raw JSON response received: {raw_response_content}")

	# Attempt to parse the JSON
	parsed_plan = json.loads(raw_response_content)
	if "include_components" in parsed_plan and isinstance(parsed_plan["include_components"], list):
	# Further validation: ensure all component names are valid (optional but good)
	valid_components = [comp for comp in parsed_plan["include_components"] if comp in available_components]
	if len(valid_components) != len(parsed_plan["include_components"]):
	print("Warning (llm_handler.plan_kit): LLM returned some invalid component names.")

	final_plan = {"include_components": valid_components}
	print(f"LLM Handler (plan_kit): Parsed plan: {final_plan}")
	return final_plan
	else:
	print("ERROR (llm_handler.plan_kit): LLM response was not in the expected JSON format (missing 'include_components' list).")
	return {"error": "LLM response format error", "details": "Missing 'include_components' list."}

	except json.JSONDecodeError as json_e:
	print(f"ERROR (llm_handler.plan_kit): Failed to decode JSON from LLM response. Error: {json_e}. Response was: {raw_response_content}")
	return {"error": "JSON decode error", "details": str(json_e), "raw_response": raw_response_content}
	except Exception as e:
	print(f"ERROR (llm_handler.plan_kit): LLM API call failed: {e}")
	return {"error": f"LLM API call failed: {str(e)}"}