IFX-sandbox / data /z_old /recreate_relationships.py
aliss77777's picture
Upload folder using huggingface_hub
06cb2a3 verified
import pandas as pd
import os
from graph import graph # Using the existing graph connection
from pathlib import Path
def recreate_fan_community_relationships():
"""
Recreates the MEMBER_OF relationships between Fans and Communities
using the existing CSV file and graph connection
"""
base_dir = Path(__file__).parent
relationships_file = base_dir / "data" / "relationship_csvs" / "fan_community_rels.csv"
communities_file = base_dir / "data" / "niners_output" / "fan_communities.csv"
if not relationships_file.exists() or not communities_file.exists():
print(f"Error: Could not find required CSV files")
return False
try:
# First, let's check if we can find any nodes
fan_check = graph.query("""
MATCH (f:Fan)
RETURN count(f) as fan_count,
collect(distinct f.fan_id)[0..5] as sample_ids
""")
print(f"\nFan check results: {fan_check}")
community_check = graph.query("""
MATCH (c:Community)
RETURN count(c) as community_count,
collect(distinct c.fan_chapter_name)[0..5] as sample_names
""")
print(f"\nCommunity check results: {community_check}")
# First check a community to see its structure
community_structure = graph.query("""
MATCH (c:Community)
RETURN c LIMIT 1
""")
print("\nCommunity node structure:")
print(community_structure)
# Read both CSVs
rels_df = pd.read_csv(relationships_file)
communities_df = pd.read_csv(communities_file)
# Create UUID to fan_chapter_name mapping
uuid_to_name = dict(zip(communities_df['community_id'], communities_df['Fan Chapter Name']))
print(f"Found {len(uuid_to_name)} community mappings")
print("Sample mappings:")
for uuid, name in list(uuid_to_name.items())[:3]:
print(f"{uuid} -> {name}")
proceed = input("\nDo you want to proceed with creating relationships? (y/n): ")
if proceed.lower() != 'y':
print("Aborting operation.")
return False
# Create relationships in batches
batch_size = 100
total_created = 0
for i in range(0, len(rels_df), batch_size):
batch = rels_df.iloc[i:i + batch_size]
# Convert UUIDs to fan_chapter_names
rows = []
for _, row in batch.iterrows():
community_name = uuid_to_name.get(row['end_id'])
if community_name:
rows.append({
'fan_id': row['start_id'],
'chapter_name': community_name
})
if rows:
query = """
UNWIND $rows AS row
MATCH (f:Fan {fan_id: row.fan_id})
MATCH (c:Community {fan_chapter_name: row.chapter_name})
MERGE (f)-[:MEMBER_OF]->(c)
RETURN count(*) as created
"""
result = graph.query(query, {'rows': rows})
total_created += len(rows)
print(f"Progress: Created {total_created}/{len(rels_df)} relationships")
# Verify the relationships were created
verification_query = """
MATCH ()-[r:MEMBER_OF]->()
RETURN count(r) as relationship_count
"""
result = graph.query(verification_query)
relationship_count = result[0]['relationship_count']
print(f"\nVerification: Found {relationship_count} MEMBER_OF relationships in the database")
return True
except Exception as e:
print(f"Error occurred: {str(e)}")
return False
if __name__ == "__main__":
print("Starting to recreate Fan-Community relationships...")
success = recreate_fan_community_relationships()
if success:
print("Successfully completed relationship recreation")
else:
print("Failed to recreate relationships")