Spaces:
Runtime error
Runtime error
File size: 10,255 Bytes
06cb2a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
"""
Player Search - LangChain tool for retrieving player information from Neo4j
This module provides functions to:
1. Search for players in Neo4j based on natural language queries.
2. Generate text summaries about players.
3. Return both text summaries and structured data for UI components.
"""
# Import Gradio-specific modules directly
import sys
import os
# Add parent directory to path to access gradio modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gradio_llm import llm
from gradio_graph import graph
from langchain_neo4j import GraphCypherQAChain
from langchain_core.prompts import PromptTemplate
# Create a global variable to store the last retrieved player data
# Workaround for LangChain dropping structured data
LAST_PLAYER_DATA = None
# Function to get the cached player data
def get_last_player_data():
global LAST_PLAYER_DATA
print(f"GETTING PLAYER DATA FROM CACHE: {LAST_PLAYER_DATA}")
return LAST_PLAYER_DATA
# Function to set the cached player data
def set_last_player_data(player_data):
global LAST_PLAYER_DATA
LAST_PLAYER_DATA = player_data
print(f"STORED PLAYER DATA IN CACHE: {player_data}")
# Clear the cache initially
set_last_player_data(None)
# Create the Cypher generation prompt for player search
PLAYER_SEARCH_TEMPLATE = """
You are an expert Neo4j Developer translating user questions about NFL players into Cypher queries.
Your goal is to find a specific player or group of players in the database based on the user's description.
Convert the user's question based on the schema provided.
IMPORTANT NOTES:
1. Always return the FULL player node with ALL its relevant properties for display.
Specifically include: `player_id`, `Name`, `Position`, `Jersey_number`, `College`, `Height`, `Weight`, `Years_in_nfl`, `headshot_url`, `instagram_url`, `highlight_video_url`.
2. Always use case-insensitive comparisons using `toLower()` for string properties like Name, Position, College.
3. If searching by name, use CONTAINS for flexibility (e.g., `toLower(p.Name) CONTAINS toLower("bosa")`).
4. If searching by number, ensure the number property (`p.Jersey_number`) is matched correctly (it's likely stored as an integer or string, check schema).
5. NEVER use the embedding property.
6. Limit results to 1 if the user asks for a specific player, but allow multiple for general queries (e.g., "list all QBs"). Default to LIMIT 5 if multiple results are possible and no limit is specified.
Example Questions and Queries:
1. "Who is Nick Bosa?"
```
MATCH (p:Player)
WHERE toLower(p.Name) CONTAINS toLower("Nick Bosa")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
LIMIT 1
```
2. "Tell me about player number 13"
```
MATCH (p:Player)
WHERE p.Jersey_number = 13 OR p.Jersey_number = "13" // Adapt based on schema type
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
LIMIT 1
```
3. "List all quarterbacks"
```
MATCH (p:Player)
WHERE toLower(p.Position) = toLower("QB")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
ORDER BY p.Name
LIMIT 5
```
4. "Find players from Central Florida"
```
MATCH (p:Player)
WHERE toLower(p.College) CONTAINS toLower("Central Florida")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
ORDER BY p.Name
LIMIT 5
```
Schema:
{schema}
Question:
{question}
"""
player_search_prompt = PromptTemplate.from_template(PLAYER_SEARCH_TEMPLATE)
# Create the player summary generation prompt
PLAYER_SUMMARY_TEMPLATE = """
You are a helpful AI assistant providing information about an NFL player.
Based on the following data, write a concise 1-2 sentence summary.
Focus on their name, position, and maybe college or experience.
Data:
- Name: {Name}
- Position: {Position}
- Number: {Jersey_number}
- College: {College}
- Experience (Years): {Years_in_nfl}
Write the summary:
"""
player_summary_prompt = PromptTemplate.from_template(PLAYER_SUMMARY_TEMPLATE)
# Create the Cypher QA chain for player search
player_search_chain = GraphCypherQAChain.from_llm(
llm,
graph=graph,
verbose=True,
cypher_prompt=player_search_prompt,
return_direct=True, # Return raw results
allow_dangerous_requests=True
)
# Function to parse player data from Cypher result
def parse_player_data(result):
"""Parse the player data from the Cypher result into a structured dictionary."""
if not result or not isinstance(result, list) or len(result) == 0:
print("Parsing player data: No result found.")
return None
# Assuming the query returns one player row or we take the first if multiple
player = result[0]
print(f"Parsing player data: Raw result item: {player}")
# Extract properties using the defined map, checking ONLY for the prefixed keys
parsed_data = {}
# Corrected key map to use lowercase property names matching Cypher output
key_map = {
# Key from Cypher result : Key for output dictionary
'p.player_id': 'player_id',
'p.name': 'Name', # Corrected case
'p.position': 'Position', # Corrected case
'p.jersey_number': 'Jersey_number', # Corrected case
'p.college': 'College', # Corrected case
'p.height': 'Height', # Corrected case
'p.weight': 'Weight', # Corrected case
'p.years_in_nfl': 'Years_in_nfl', # Corrected case
'p.headshot_url': 'headshot_url',
'p.instagram_url': 'instagram_url',
'p.highlight_video_url': 'highlight_video_url'
}
for cypher_key, dict_key in key_map.items():
if cypher_key in player:
parsed_data[dict_key] = player[cypher_key]
# else: # Optional: Log if a specific key wasn't found
# print(f"Parsing player data: Key '{cypher_key}' not found in result.")
# Ensure essential keys were successfully mapped
if 'Name' not in parsed_data or 'player_id' not in parsed_data:
print("Parsing player data: Essential keys ('Name', 'player_id') were not successfully mapped from result.")
print(f"Available keys in result: {list(player.keys())}")
return None
print(f"Parsing player data: Parsed dictionary: {parsed_data}")
return parsed_data
# Function to generate a player summary using LLM
def generate_player_summary(player_data):
"""Generate a natural language summary of the player using the LLM."""
if not player_data:
return "I couldn't retrieve enough information to summarize the player."
try:
# Format the prompt with player data, providing defaults
formatted_prompt = player_summary_prompt.format(
Name=player_data.get('Name', 'N/A'),
Position=player_data.get('Position', 'N/A'),
Jersey_number=player_data.get('Jersey_number', 'N/A'),
College=player_data.get('College', 'N/A'),
Years_in_nfl=player_data.get('Years_in_nfl', 'N/A')
)
# Generate the summary using the LLM
summary = llm.invoke(formatted_prompt)
summary_content = summary.content if hasattr(summary, 'content') else str(summary)
print(f"Generated Player Summary: {summary_content}")
return summary_content
except Exception as e:
print(f"Error generating player summary: {str(e)}")
return f"Summary for {player_data.get('Name', 'this player')}."
# Main function to search for a player and generate output
def player_search_qa(input_text: str) -> dict:
"""
Searches for a player based on input text, generates a summary, and returns data.
Args:
input_text (str): Natural language query about a player.
Returns:
dict: Response containing text summary and structured player data.
"""
global LAST_PLAYER_DATA
set_last_player_data(None) # Clear cache at the start of each call
try:
# Log the incoming query
print(f"--- Processing Player Search Query: {input_text} ---")
# Search for the player using the Cypher chain
search_result = player_search_chain.invoke({"query": input_text})
print(f"Raw search result from chain: {search_result}")
# Check if we have a result and it's not empty
if not search_result or not search_result.get('result') or not isinstance(search_result['result'], list) or len(search_result['result']) == 0:
print("Player Search: No results found in Neo4j.")
return {
"output": "I couldn't find information about that player. Could you be more specific or try a different name/number?",
"player_data": None
}
# Parse the player data from the first result
player_data = parse_player_data(search_result['result'])
if not player_data:
print("Player Search: Failed to parse data from Neo4j result.")
return {
"output": "I found some information, but couldn't process the player details correctly.",
"player_data": None
}
# Generate the text summary
summary_text = generate_player_summary(player_data)
# Store the structured data in the cache for the UI component
set_last_player_data(player_data)
# Return both the text summary and the structured data
final_output = {
"output": summary_text,
"player_data": player_data # Include for potential direct use if caching fails
}
print(f"Final player_search_qa output: {final_output}")
return final_output
except Exception as e:
print(f"Error in player_search_qa: {str(e)}")
import traceback
traceback.print_exc()
set_last_player_data(None) # Clear cache on error
return {
"output": "I encountered an error while searching for the player. Please try again.",
"player_data": None
} |