Spaces:

aliss77777
/

IFX-sandbox

Runtime error

App Files Files Community

IFX-sandbox / tools /player_search.py

aliss77777

Upload folder using huggingface_hub

06cb2a3 verified 4 months ago

raw

history blame contribute delete

10.3 kB

	"""
	Player Search - LangChain tool for retrieving player information from Neo4j

	This module provides functions to:
	1. Search for players in Neo4j based on natural language queries.
	2. Generate text summaries about players.
	3. Return both text summaries and structured data for UI components.
	"""

	# Import Gradio-specific modules directly
	import sys
	import os
	# Add parent directory to path to access gradio modules
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from gradio_llm import llm
	from gradio_graph import graph
	from langchain_neo4j import GraphCypherQAChain
	from langchain_core.prompts import PromptTemplate

	# Create a global variable to store the last retrieved player data
	# Workaround for LangChain dropping structured data
	LAST_PLAYER_DATA = None

	# Function to get the cached player data
	def get_last_player_data():
	global LAST_PLAYER_DATA
	print(f"GETTING PLAYER DATA FROM CACHE: {LAST_PLAYER_DATA}")
	return LAST_PLAYER_DATA

	# Function to set the cached player data
	def set_last_player_data(player_data):
	global LAST_PLAYER_DATA
	LAST_PLAYER_DATA = player_data
	print(f"STORED PLAYER DATA IN CACHE: {player_data}")

	# Clear the cache initially
	set_last_player_data(None)

	# Create the Cypher generation prompt for player search
	PLAYER_SEARCH_TEMPLATE = """
	You are an expert Neo4j Developer translating user questions about NFL players into Cypher queries.
	Your goal is to find a specific player or group of players in the database based on the user's description.

	Convert the user's question based on the schema provided.

	IMPORTANT NOTES:
	1. Always return the FULL player node with ALL its relevant properties for display.
	Specifically include: `player_id`, `Name`, `Position`, `Jersey_number`, `College`, `Height`, `Weight`, `Years_in_nfl`, `headshot_url`, `instagram_url`, `highlight_video_url`.
	2. Always use case-insensitive comparisons using `toLower()` for string properties like Name, Position, College.
	3. If searching by name, use CONTAINS for flexibility (e.g., `toLower(p.Name) CONTAINS toLower("bosa")`).
	4. If searching by number, ensure the number property (`p.Jersey_number`) is matched correctly (it's likely stored as an integer or string, check schema).
	5. NEVER use the embedding property.
	6. Limit results to 1 if the user asks for a specific player, but allow multiple for general queries (e.g., "list all QBs"). Default to LIMIT 5 if multiple results are possible and no limit is specified.

	Example Questions and Queries:

	1. "Who is Nick Bosa?"
	```
	MATCH (p:Player)
	WHERE toLower(p.Name) CONTAINS toLower("Nick Bosa")
	RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
	LIMIT 1
	```

	2. "Tell me about player number 13"
	```
	MATCH (p:Player)
	WHERE p.Jersey_number = 13 OR p.Jersey_number = "13" // Adapt based on schema type
	RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
	LIMIT 1
	```

	3. "List all quarterbacks"
	```
	MATCH (p:Player)
	WHERE toLower(p.Position) = toLower("QB")
	RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
	ORDER BY p.Name
	LIMIT 5
	```

	4. "Find players from Central Florida"
	```
	MATCH (p:Player)
	WHERE toLower(p.College) CONTAINS toLower("Central Florida")
	RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
	ORDER BY p.Name
	LIMIT 5
	```

	Schema:
	{schema}

	Question:
	{question}
	"""

	player_search_prompt = PromptTemplate.from_template(PLAYER_SEARCH_TEMPLATE)

	# Create the player summary generation prompt
	PLAYER_SUMMARY_TEMPLATE = """
	You are a helpful AI assistant providing information about an NFL player.
	Based on the following data, write a concise 1-2 sentence summary.
	Focus on their name, position, and maybe college or experience.

	Data:
	- Name: {Name}
	- Position: {Position}
	- Number: {Jersey_number}
	- College: {College}
	- Experience (Years): {Years_in_nfl}

	Write the summary:
	"""

	player_summary_prompt = PromptTemplate.from_template(PLAYER_SUMMARY_TEMPLATE)

	# Create the Cypher QA chain for player search
	player_search_chain = GraphCypherQAChain.from_llm(
	llm,
	graph=graph,
	verbose=True,
	cypher_prompt=player_search_prompt,
	return_direct=True, # Return raw results
	allow_dangerous_requests=True
	)

	# Function to parse player data from Cypher result
	def parse_player_data(result):
	"""Parse the player data from the Cypher result into a structured dictionary."""
	if not result or not isinstance(result, list) or len(result) == 0:
	print("Parsing player data: No result found.")
	return None

	# Assuming the query returns one player row or we take the first if multiple
	player = result[0]
	print(f"Parsing player data: Raw result item: {player}")

	# Extract properties using the defined map, checking ONLY for the prefixed keys
	parsed_data = {}
	# Corrected key map to use lowercase property names matching Cypher output
	key_map = {
	# Key from Cypher result : Key for output dictionary
	'p.player_id': 'player_id',
	'p.name': 'Name', # Corrected case
	'p.position': 'Position', # Corrected case
	'p.jersey_number': 'Jersey_number', # Corrected case
	'p.college': 'College', # Corrected case
	'p.height': 'Height', # Corrected case
	'p.weight': 'Weight', # Corrected case
	'p.years_in_nfl': 'Years_in_nfl', # Corrected case
	'p.headshot_url': 'headshot_url',
	'p.instagram_url': 'instagram_url',
	'p.highlight_video_url': 'highlight_video_url'
	}

	for cypher_key, dict_key in key_map.items():
	if cypher_key in player:
	parsed_data[dict_key] = player[cypher_key]
	# else: # Optional: Log if a specific key wasn't found
	# print(f"Parsing player data: Key '{cypher_key}' not found in result.")

	# Ensure essential keys were successfully mapped
	if 'Name' not in parsed_data or 'player_id' not in parsed_data:
	print("Parsing player data: Essential keys ('Name', 'player_id') were not successfully mapped from result.")
	print(f"Available keys in result: {list(player.keys())}")
	return None

	print(f"Parsing player data: Parsed dictionary: {parsed_data}")
	return parsed_data

	# Function to generate a player summary using LLM
	def generate_player_summary(player_data):
	"""Generate a natural language summary of the player using the LLM."""
	if not player_data:
	return "I couldn't retrieve enough information to summarize the player."

	try:
	# Format the prompt with player data, providing defaults
	formatted_prompt = player_summary_prompt.format(
	Name=player_data.get('Name', 'N/A'),
	Position=player_data.get('Position', 'N/A'),
	Jersey_number=player_data.get('Jersey_number', 'N/A'),
	College=player_data.get('College', 'N/A'),
	Years_in_nfl=player_data.get('Years_in_nfl', 'N/A')
	)

	# Generate the summary using the LLM
	summary = llm.invoke(formatted_prompt)
	summary_content = summary.content if hasattr(summary, 'content') else str(summary)
	print(f"Generated Player Summary: {summary_content}")
	return summary_content
	except Exception as e:
	print(f"Error generating player summary: {str(e)}")
	return f"Summary for {player_data.get('Name', 'this player')}."

	# Main function to search for a player and generate output
	def player_search_qa(input_text: str) -> dict:
	"""
	Searches for a player based on input text, generates a summary, and returns data.

	Args:
	input_text (str): Natural language query about a player.

	Returns:
	dict: Response containing text summary and structured player data.
	"""
	global LAST_PLAYER_DATA
	set_last_player_data(None) # Clear cache at the start of each call

	try:
	# Log the incoming query
	print(f"--- Processing Player Search Query: {input_text} ---")

	# Search for the player using the Cypher chain
	search_result = player_search_chain.invoke({"query": input_text})
	print(f"Raw search result from chain: {search_result}")

	# Check if we have a result and it's not empty
	if not search_result or not search_result.get('result') or not isinstance(search_result['result'], list) or len(search_result['result']) == 0:
	print("Player Search: No results found in Neo4j.")
	return {
	"output": "I couldn't find information about that player. Could you be more specific or try a different name/number?",
	"player_data": None
	}

	# Parse the player data from the first result
	player_data = parse_player_data(search_result['result'])

	if not player_data:
	print("Player Search: Failed to parse data from Neo4j result.")
	return {
	"output": "I found some information, but couldn't process the player details correctly.",
	"player_data": None
	}

	# Generate the text summary
	summary_text = generate_player_summary(player_data)

	# Store the structured data in the cache for the UI component
	set_last_player_data(player_data)

	# Return both the text summary and the structured data
	final_output = {
	"output": summary_text,
	"player_data": player_data # Include for potential direct use if caching fails
	}
	print(f"Final player_search_qa output: {final_output}")
	return final_output

	except Exception as e:
	print(f"Error in player_search_qa: {str(e)}")
	import traceback
	traceback.print_exc()
	set_last_player_data(None) # Clear cache on error
	return {
	"output": "I encountered an error while searching for the player. Please try again.",
	"player_data": None
	}