oliver-aizip kai-aizip commited on
Commit
f85a3ff
·
verified ·
1 Parent(s): dab8aab

update data loading (#1)

Browse files

- update data loading (4a6a9087b9c9112279a8d1d77794d2c80e2f4e70)


Co-authored-by: Kai <[email protected]>

Files changed (1) hide show
  1. utils/data_loader.py +14 -2
utils/data_loader.py CHANGED
@@ -5,11 +5,20 @@ import random
5
  import re
6
  from .context_processor import process_highlights
7
 
 
 
 
8
  def load_arena_data():
9
  """
10
  Loads the arena data from the arena_df.csv file in the utils directory.
11
  Returns the data in a format compatible with the application.
12
  """
 
 
 
 
 
 
13
  try:
14
  # Define the path to the CSV file
15
  csv_path = os.path.join('utils', 'arena_df.csv')
@@ -17,6 +26,9 @@ def load_arena_data():
17
  # Read the CSV file
18
  df = pd.read_csv(csv_path)
19
  print(f"Loaded arena data with {len(df)} examples")
 
 
 
20
  return df
21
  except Exception as e:
22
  print(f"Error loading arena data: {e}")
@@ -39,7 +51,7 @@ def get_random_example():
39
  Selects a random example from the loaded arena data.
40
  Returns the example data in a format compatible with the application.
41
  """
42
- # Load the arena data
43
  df = load_arena_data()
44
 
45
  if df.empty:
@@ -89,7 +101,7 @@ def get_random_example():
89
 
90
  if isinstance(example['contexts_highlighted'], str):
91
  try:
92
- # Try direct parsing, assuming it's a valid JSON array
93
  raw_str = example['contexts_highlighted']
94
 
95
  # First, manually parse the highlighted contexts using regex
 
5
  import re
6
  from .context_processor import process_highlights
7
 
8
+ # Global data store - loaded once at import time
9
+ _ARENA_DATA = None
10
+
11
  def load_arena_data():
12
  """
13
  Loads the arena data from the arena_df.csv file in the utils directory.
14
  Returns the data in a format compatible with the application.
15
  """
16
+ global _ARENA_DATA
17
+
18
+ # If data is already loaded, return it
19
+ if _ARENA_DATA is not None:
20
+ return _ARENA_DATA
21
+
22
  try:
23
  # Define the path to the CSV file
24
  csv_path = os.path.join('utils', 'arena_df.csv')
 
26
  # Read the CSV file
27
  df = pd.read_csv(csv_path)
28
  print(f"Loaded arena data with {len(df)} examples")
29
+
30
+ # Store the data globally
31
+ _ARENA_DATA = df
32
  return df
33
  except Exception as e:
34
  print(f"Error loading arena data: {e}")
 
51
  Selects a random example from the loaded arena data.
52
  Returns the example data in a format compatible with the application.
53
  """
54
+ # Get the globally stored data - won't reload from disk
55
  df = load_arena_data()
56
 
57
  if df.empty:
 
101
 
102
  if isinstance(example['contexts_highlighted'], str):
103
  try:
104
+ # Try direct JSON parsing first
105
  raw_str = example['contexts_highlighted']
106
 
107
  # First, manually parse the highlighted contexts using regex