Spaces:
Runtime error
Runtime error
""" | |
Player Search - LangChain tool for retrieving player information from Neo4j | |
This module provides functions to: | |
1. Search for players in Neo4j based on natural language queries. | |
2. Generate text summaries about players. | |
3. Return both text summaries and structured data for UI components. | |
""" | |
# Import Gradio-specific modules directly | |
import sys | |
import os | |
# Add parent directory to path to access gradio modules | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
from gradio_llm import llm | |
from gradio_graph import graph | |
from langchain_neo4j import GraphCypherQAChain | |
from langchain_core.prompts import PromptTemplate | |
# Create a global variable to store the last retrieved player data | |
# Workaround for LangChain dropping structured data | |
LAST_PLAYER_DATA = None | |
# Function to get the cached player data | |
def get_last_player_data(): | |
global LAST_PLAYER_DATA | |
print(f"GETTING PLAYER DATA FROM CACHE: {LAST_PLAYER_DATA}") | |
return LAST_PLAYER_DATA | |
# Function to set the cached player data | |
def set_last_player_data(player_data): | |
global LAST_PLAYER_DATA | |
LAST_PLAYER_DATA = player_data | |
print(f"STORED PLAYER DATA IN CACHE: {player_data}") | |
# Clear the cache initially | |
set_last_player_data(None) | |
# Create the Cypher generation prompt for player search | |
PLAYER_SEARCH_TEMPLATE = """ | |
You are an expert Neo4j Developer translating user questions about NFL players into Cypher queries. | |
Your goal is to find a specific player or group of players in the database based on the user's description. | |
Convert the user's question based on the schema provided. | |
IMPORTANT NOTES: | |
1. Always return the FULL player node with ALL its relevant properties for display. | |
Specifically include: `player_id`, `Name`, `Position`, `Jersey_number`, `College`, `Height`, `Weight`, `Years_in_nfl`, `headshot_url`, `instagram_url`, `highlight_video_url`. | |
2. Always use case-insensitive comparisons using `toLower()` for string properties like Name, Position, College. | |
3. If searching by name, use CONTAINS for flexibility (e.g., `toLower(p.Name) CONTAINS toLower("bosa")`). | |
4. If searching by number, ensure the number property (`p.Jersey_number`) is matched correctly (it's likely stored as an integer or string, check schema). | |
5. NEVER use the embedding property. | |
6. Limit results to 1 if the user asks for a specific player, but allow multiple for general queries (e.g., "list all QBs"). Default to LIMIT 5 if multiple results are possible and no limit is specified. | |
Example Questions and Queries: | |
1. "Who is Nick Bosa?" | |
``` | |
MATCH (p:Player) | |
WHERE toLower(p.Name) CONTAINS toLower("Nick Bosa") | |
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url | |
LIMIT 1 | |
``` | |
2. "Tell me about player number 13" | |
``` | |
MATCH (p:Player) | |
WHERE p.Jersey_number = 13 OR p.Jersey_number = "13" // Adapt based on schema type | |
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url | |
LIMIT 1 | |
``` | |
3. "List all quarterbacks" | |
``` | |
MATCH (p:Player) | |
WHERE toLower(p.Position) = toLower("QB") | |
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url | |
ORDER BY p.Name | |
LIMIT 5 | |
``` | |
4. "Find players from Central Florida" | |
``` | |
MATCH (p:Player) | |
WHERE toLower(p.College) CONTAINS toLower("Central Florida") | |
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url | |
ORDER BY p.Name | |
LIMIT 5 | |
``` | |
Schema: | |
{schema} | |
Question: | |
{question} | |
""" | |
player_search_prompt = PromptTemplate.from_template(PLAYER_SEARCH_TEMPLATE) | |
# Create the player summary generation prompt | |
PLAYER_SUMMARY_TEMPLATE = """ | |
You are a helpful AI assistant providing information about an NFL player. | |
Based on the following data, write a concise 1-2 sentence summary. | |
Focus on their name, position, and maybe college or experience. | |
Data: | |
- Name: {Name} | |
- Position: {Position} | |
- Number: {Jersey_number} | |
- College: {College} | |
- Experience (Years): {Years_in_nfl} | |
Write the summary: | |
""" | |
player_summary_prompt = PromptTemplate.from_template(PLAYER_SUMMARY_TEMPLATE) | |
# Create the Cypher QA chain for player search | |
player_search_chain = GraphCypherQAChain.from_llm( | |
llm, | |
graph=graph, | |
verbose=True, | |
cypher_prompt=player_search_prompt, | |
return_direct=True, # Return raw results | |
allow_dangerous_requests=True | |
) | |
# Function to parse player data from Cypher result | |
def parse_player_data(result): | |
"""Parse the player data from the Cypher result into a structured dictionary.""" | |
if not result or not isinstance(result, list) or len(result) == 0: | |
print("Parsing player data: No result found.") | |
return None | |
# Assuming the query returns one player row or we take the first if multiple | |
player = result[0] | |
print(f"Parsing player data: Raw result item: {player}") | |
# Extract properties using the defined map, checking ONLY for the prefixed keys | |
parsed_data = {} | |
# Corrected key map to use lowercase property names matching Cypher output | |
key_map = { | |
# Key from Cypher result : Key for output dictionary | |
'p.player_id': 'player_id', | |
'p.name': 'Name', # Corrected case | |
'p.position': 'Position', # Corrected case | |
'p.jersey_number': 'Jersey_number', # Corrected case | |
'p.college': 'College', # Corrected case | |
'p.height': 'Height', # Corrected case | |
'p.weight': 'Weight', # Corrected case | |
'p.years_in_nfl': 'Years_in_nfl', # Corrected case | |
'p.headshot_url': 'headshot_url', | |
'p.instagram_url': 'instagram_url', | |
'p.highlight_video_url': 'highlight_video_url' | |
} | |
for cypher_key, dict_key in key_map.items(): | |
if cypher_key in player: | |
parsed_data[dict_key] = player[cypher_key] | |
# else: # Optional: Log if a specific key wasn't found | |
# print(f"Parsing player data: Key '{cypher_key}' not found in result.") | |
# Ensure essential keys were successfully mapped | |
if 'Name' not in parsed_data or 'player_id' not in parsed_data: | |
print("Parsing player data: Essential keys ('Name', 'player_id') were not successfully mapped from result.") | |
print(f"Available keys in result: {list(player.keys())}") | |
return None | |
print(f"Parsing player data: Parsed dictionary: {parsed_data}") | |
return parsed_data | |
# Function to generate a player summary using LLM | |
def generate_player_summary(player_data): | |
"""Generate a natural language summary of the player using the LLM.""" | |
if not player_data: | |
return "I couldn't retrieve enough information to summarize the player." | |
try: | |
# Format the prompt with player data, providing defaults | |
formatted_prompt = player_summary_prompt.format( | |
Name=player_data.get('Name', 'N/A'), | |
Position=player_data.get('Position', 'N/A'), | |
Jersey_number=player_data.get('Jersey_number', 'N/A'), | |
College=player_data.get('College', 'N/A'), | |
Years_in_nfl=player_data.get('Years_in_nfl', 'N/A') | |
) | |
# Generate the summary using the LLM | |
summary = llm.invoke(formatted_prompt) | |
summary_content = summary.content if hasattr(summary, 'content') else str(summary) | |
print(f"Generated Player Summary: {summary_content}") | |
return summary_content | |
except Exception as e: | |
print(f"Error generating player summary: {str(e)}") | |
return f"Summary for {player_data.get('Name', 'this player')}." | |
# Main function to search for a player and generate output | |
def player_search_qa(input_text: str) -> dict: | |
""" | |
Searches for a player based on input text, generates a summary, and returns data. | |
Args: | |
input_text (str): Natural language query about a player. | |
Returns: | |
dict: Response containing text summary and structured player data. | |
""" | |
global LAST_PLAYER_DATA | |
set_last_player_data(None) # Clear cache at the start of each call | |
try: | |
# Log the incoming query | |
print(f"--- Processing Player Search Query: {input_text} ---") | |
# Search for the player using the Cypher chain | |
search_result = player_search_chain.invoke({"query": input_text}) | |
print(f"Raw search result from chain: {search_result}") | |
# Check if we have a result and it's not empty | |
if not search_result or not search_result.get('result') or not isinstance(search_result['result'], list) or len(search_result['result']) == 0: | |
print("Player Search: No results found in Neo4j.") | |
return { | |
"output": "I couldn't find information about that player. Could you be more specific or try a different name/number?", | |
"player_data": None | |
} | |
# Parse the player data from the first result | |
player_data = parse_player_data(search_result['result']) | |
if not player_data: | |
print("Player Search: Failed to parse data from Neo4j result.") | |
return { | |
"output": "I found some information, but couldn't process the player details correctly.", | |
"player_data": None | |
} | |
# Generate the text summary | |
summary_text = generate_player_summary(player_data) | |
# Store the structured data in the cache for the UI component | |
set_last_player_data(player_data) | |
# Return both the text summary and the structured data | |
final_output = { | |
"output": summary_text, | |
"player_data": player_data # Include for potential direct use if caching fails | |
} | |
print(f"Final player_search_qa output: {final_output}") | |
return final_output | |
except Exception as e: | |
print(f"Error in player_search_qa: {str(e)}") | |
import traceback | |
traceback.print_exc() | |
set_last_player_data(None) # Clear cache on error | |
return { | |
"output": "I encountered an error while searching for the player. Please try again.", | |
"player_data": None | |
} |