File size: 10,255 Bytes
06cb2a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
"""
Player Search - LangChain tool for retrieving player information from Neo4j

This module provides functions to:
1. Search for players in Neo4j based on natural language queries.
2. Generate text summaries about players.
3. Return both text summaries and structured data for UI components.
"""

# Import Gradio-specific modules directly
import sys
import os
# Add parent directory to path to access gradio modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gradio_llm import llm
from gradio_graph import graph
from langchain_neo4j import GraphCypherQAChain
from langchain_core.prompts import PromptTemplate

# Create a global variable to store the last retrieved player data
# Workaround for LangChain dropping structured data
LAST_PLAYER_DATA = None

# Function to get the cached player data
def get_last_player_data():
    global LAST_PLAYER_DATA
    print(f"GETTING PLAYER DATA FROM CACHE: {LAST_PLAYER_DATA}")
    return LAST_PLAYER_DATA

# Function to set the cached player data
def set_last_player_data(player_data):
    global LAST_PLAYER_DATA
    LAST_PLAYER_DATA = player_data
    print(f"STORED PLAYER DATA IN CACHE: {player_data}")

# Clear the cache initially
set_last_player_data(None)

# Create the Cypher generation prompt for player search
PLAYER_SEARCH_TEMPLATE = """
You are an expert Neo4j Developer translating user questions about NFL players into Cypher queries.
Your goal is to find a specific player or group of players in the database based on the user's description.

Convert the user's question based on the schema provided.

IMPORTANT NOTES:
1. Always return the FULL player node with ALL its relevant properties for display.
   Specifically include: `player_id`, `Name`, `Position`, `Jersey_number`, `College`, `Height`, `Weight`, `Years_in_nfl`, `headshot_url`, `instagram_url`, `highlight_video_url`.
2. Always use case-insensitive comparisons using `toLower()` for string properties like Name, Position, College.
3. If searching by name, use CONTAINS for flexibility (e.g., `toLower(p.Name) CONTAINS toLower("bosa")`).
4. If searching by number, ensure the number property (`p.Jersey_number`) is matched correctly (it's likely stored as an integer or string, check schema).
5. NEVER use the embedding property.
6. Limit results to 1 if the user asks for a specific player, but allow multiple for general queries (e.g., "list all QBs"). Default to LIMIT 5 if multiple results are possible and no limit is specified.

Example Questions and Queries:

1. "Who is Nick Bosa?"
```
MATCH (p:Player)
WHERE toLower(p.Name) CONTAINS toLower("Nick Bosa")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
LIMIT 1
```

2. "Tell me about player number 13"
```
MATCH (p:Player)
WHERE p.Jersey_number = 13 OR p.Jersey_number = "13" // Adapt based on schema type
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
LIMIT 1
```

3. "List all quarterbacks"
```
MATCH (p:Player)
WHERE toLower(p.Position) = toLower("QB")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
ORDER BY p.Name
LIMIT 5
```

4. "Find players from Central Florida"
```
MATCH (p:Player)
WHERE toLower(p.College) CONTAINS toLower("Central Florida")
RETURN p.player_id, p.Name, p.Position, p.Jersey_number, p.College, p.Height, p.Weight, p.Years_in_nfl, p.headshot_url, p.instagram_url, p.highlight_video_url
ORDER BY p.Name
LIMIT 5
```

Schema:
{schema}

Question:
{question}
"""

player_search_prompt = PromptTemplate.from_template(PLAYER_SEARCH_TEMPLATE)

# Create the player summary generation prompt
PLAYER_SUMMARY_TEMPLATE = """
You are a helpful AI assistant providing information about an NFL player.
Based on the following data, write a concise 1-2 sentence summary.
Focus on their name, position, and maybe college or experience.

Data:
- Name: {Name}
- Position: {Position}
- Number: {Jersey_number}
- College: {College}
- Experience (Years): {Years_in_nfl}

Write the summary:
"""

player_summary_prompt = PromptTemplate.from_template(PLAYER_SUMMARY_TEMPLATE)

# Create the Cypher QA chain for player search
player_search_chain = GraphCypherQAChain.from_llm(
    llm,
    graph=graph,
    verbose=True,
    cypher_prompt=player_search_prompt,
    return_direct=True,  # Return raw results
    allow_dangerous_requests=True
)

# Function to parse player data from Cypher result
def parse_player_data(result):
    """Parse the player data from the Cypher result into a structured dictionary."""
    if not result or not isinstance(result, list) or len(result) == 0:
        print("Parsing player data: No result found.")
        return None

    # Assuming the query returns one player row or we take the first if multiple
    player = result[0]
    print(f"Parsing player data: Raw result item: {player}")

    # Extract properties using the defined map, checking ONLY for the prefixed keys
    parsed_data = {}
    # Corrected key map to use lowercase property names matching Cypher output
    key_map = {
        # Key from Cypher result : Key for output dictionary
        'p.player_id': 'player_id',
        'p.name': 'Name', # Corrected case
        'p.position': 'Position', # Corrected case
        'p.jersey_number': 'Jersey_number', # Corrected case
        'p.college': 'College', # Corrected case
        'p.height': 'Height', # Corrected case
        'p.weight': 'Weight', # Corrected case
        'p.years_in_nfl': 'Years_in_nfl', # Corrected case
        'p.headshot_url': 'headshot_url',
        'p.instagram_url': 'instagram_url',
        'p.highlight_video_url': 'highlight_video_url'
    }

    for cypher_key, dict_key in key_map.items():
        if cypher_key in player:
            parsed_data[dict_key] = player[cypher_key]
        # else: # Optional: Log if a specific key wasn't found
        #     print(f"Parsing player data: Key '{cypher_key}' not found in result.")

    # Ensure essential keys were successfully mapped
    if 'Name' not in parsed_data or 'player_id' not in parsed_data:
        print("Parsing player data: Essential keys ('Name', 'player_id') were not successfully mapped from result.")
        print(f"Available keys in result: {list(player.keys())}")
        return None

    print(f"Parsing player data: Parsed dictionary: {parsed_data}")
    return parsed_data

# Function to generate a player summary using LLM
def generate_player_summary(player_data):
    """Generate a natural language summary of the player using the LLM."""
    if not player_data:
        return "I couldn't retrieve enough information to summarize the player."

    try:
        # Format the prompt with player data, providing defaults
        formatted_prompt = player_summary_prompt.format(
            Name=player_data.get('Name', 'N/A'),
            Position=player_data.get('Position', 'N/A'),
            Jersey_number=player_data.get('Jersey_number', 'N/A'),
            College=player_data.get('College', 'N/A'),
            Years_in_nfl=player_data.get('Years_in_nfl', 'N/A')
        )

        # Generate the summary using the LLM
        summary = llm.invoke(formatted_prompt)
        summary_content = summary.content if hasattr(summary, 'content') else str(summary)
        print(f"Generated Player Summary: {summary_content}")
        return summary_content
    except Exception as e:
        print(f"Error generating player summary: {str(e)}")
        return f"Summary for {player_data.get('Name', 'this player')}."

# Main function to search for a player and generate output
def player_search_qa(input_text: str) -> dict:
    """
    Searches for a player based on input text, generates a summary, and returns data.

    Args:
        input_text (str): Natural language query about a player.

    Returns:
        dict: Response containing text summary and structured player data.
    """
    global LAST_PLAYER_DATA
    set_last_player_data(None) # Clear cache at the start of each call

    try:
        # Log the incoming query
        print(f"--- Processing Player Search Query: {input_text} ---")

        # Search for the player using the Cypher chain
        search_result = player_search_chain.invoke({"query": input_text})
        print(f"Raw search result from chain: {search_result}")

        # Check if we have a result and it's not empty
        if not search_result or not search_result.get('result') or not isinstance(search_result['result'], list) or len(search_result['result']) == 0:
            print("Player Search: No results found in Neo4j.")
            return {
                "output": "I couldn't find information about that player. Could you be more specific or try a different name/number?",
                "player_data": None
            }

        # Parse the player data from the first result
        player_data = parse_player_data(search_result['result'])

        if not player_data:
            print("Player Search: Failed to parse data from Neo4j result.")
            return {
                "output": "I found some information, but couldn't process the player details correctly.",
                "player_data": None
            }

        # Generate the text summary
        summary_text = generate_player_summary(player_data)

        # Store the structured data in the cache for the UI component
        set_last_player_data(player_data)

        # Return both the text summary and the structured data
        final_output = {
            "output": summary_text,
            "player_data": player_data # Include for potential direct use if caching fails
        }
        print(f"Final player_search_qa output: {final_output}")
        return final_output

    except Exception as e:
        print(f"Error in player_search_qa: {str(e)}")
        import traceback
        traceback.print_exc()
        set_last_player_data(None) # Clear cache on error
        return {
            "output": "I encountered an error while searching for the player. Please try again.",
            "player_data": None
        }