File size: 4,110 Bytes
06cb2a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
import os
from graph import graph  # Using the existing graph connection
from pathlib import Path

def recreate_fan_community_relationships():
    """
    Recreates the MEMBER_OF relationships between Fans and Communities
    using the existing CSV file and graph connection
    """
    base_dir = Path(__file__).parent
    relationships_file = base_dir / "data" / "relationship_csvs" / "fan_community_rels.csv"
    communities_file = base_dir / "data" / "niners_output" / "fan_communities.csv"

    if not relationships_file.exists() or not communities_file.exists():
        print(f"Error: Could not find required CSV files")
        return False

    try:
        # First, let's check if we can find any nodes
        fan_check = graph.query("""
            MATCH (f:Fan) 
            RETURN count(f) as fan_count, 
                   collect(distinct f.fan_id)[0..5] as sample_ids
        """)
        print(f"\nFan check results: {fan_check}")

        community_check = graph.query("""
            MATCH (c:Community) 
            RETURN count(c) as community_count,
                   collect(distinct c.fan_chapter_name)[0..5] as sample_names
        """)
        print(f"\nCommunity check results: {community_check}")

        # First check a community to see its structure
        community_structure = graph.query("""
            MATCH (c:Community) 
            RETURN c LIMIT 1
        """)
        print("\nCommunity node structure:")
        print(community_structure)

        # Read both CSVs
        rels_df = pd.read_csv(relationships_file)
        communities_df = pd.read_csv(communities_file)
        
        # Create UUID to fan_chapter_name mapping
        uuid_to_name = dict(zip(communities_df['community_id'], communities_df['Fan Chapter Name']))
        
        print(f"Found {len(uuid_to_name)} community mappings")
        print("Sample mappings:")
        for uuid, name in list(uuid_to_name.items())[:3]:
            print(f"{uuid} -> {name}")

        proceed = input("\nDo you want to proceed with creating relationships? (y/n): ")
        if proceed.lower() != 'y':
            print("Aborting operation.")
            return False

        # Create relationships in batches
        batch_size = 100
        total_created = 0

        for i in range(0, len(rels_df), batch_size):
            batch = rels_df.iloc[i:i + batch_size]
            
            # Convert UUIDs to fan_chapter_names
            rows = []
            for _, row in batch.iterrows():
                community_name = uuid_to_name.get(row['end_id'])
                if community_name:
                    rows.append({
                        'fan_id': row['start_id'],
                        'chapter_name': community_name
                    })
            
            if rows:
                query = """
                UNWIND $rows AS row
                MATCH (f:Fan {fan_id: row.fan_id})
                MATCH (c:Community {fan_chapter_name: row.chapter_name})
                MERGE (f)-[:MEMBER_OF]->(c)
                RETURN count(*) as created
                """
                
                result = graph.query(query, {'rows': rows})
                total_created += len(rows)
                print(f"Progress: Created {total_created}/{len(rels_df)} relationships")

        # Verify the relationships were created
        verification_query = """
        MATCH ()-[r:MEMBER_OF]->()
        RETURN count(r) as relationship_count
        """
        result = graph.query(verification_query)
        relationship_count = result[0]['relationship_count']
        
        print(f"\nVerification: Found {relationship_count} MEMBER_OF relationships in the database")
        return True

    except Exception as e:
        print(f"Error occurred: {str(e)}")
        return False

if __name__ == "__main__":
    print("Starting to recreate Fan-Community relationships...")
    success = recreate_fan_community_relationships()
    if success:
        print("Successfully completed relationship recreation")
    else:
        print("Failed to recreate relationships")