Spaces:
Running
on
Zero
Running
on
Zero
update data loading (#1)
Browse files- update data loading (4a6a9087b9c9112279a8d1d77794d2c80e2f4e70)
Co-authored-by: Kai <[email protected]>
- utils/data_loader.py +14 -2
utils/data_loader.py
CHANGED
@@ -5,11 +5,20 @@ import random
|
|
5 |
import re
|
6 |
from .context_processor import process_highlights
|
7 |
|
|
|
|
|
|
|
8 |
def load_arena_data():
|
9 |
"""
|
10 |
Loads the arena data from the arena_df.csv file in the utils directory.
|
11 |
Returns the data in a format compatible with the application.
|
12 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
try:
|
14 |
# Define the path to the CSV file
|
15 |
csv_path = os.path.join('utils', 'arena_df.csv')
|
@@ -17,6 +26,9 @@ def load_arena_data():
|
|
17 |
# Read the CSV file
|
18 |
df = pd.read_csv(csv_path)
|
19 |
print(f"Loaded arena data with {len(df)} examples")
|
|
|
|
|
|
|
20 |
return df
|
21 |
except Exception as e:
|
22 |
print(f"Error loading arena data: {e}")
|
@@ -39,7 +51,7 @@ def get_random_example():
|
|
39 |
Selects a random example from the loaded arena data.
|
40 |
Returns the example data in a format compatible with the application.
|
41 |
"""
|
42 |
-
#
|
43 |
df = load_arena_data()
|
44 |
|
45 |
if df.empty:
|
@@ -89,7 +101,7 @@ def get_random_example():
|
|
89 |
|
90 |
if isinstance(example['contexts_highlighted'], str):
|
91 |
try:
|
92 |
-
# Try direct parsing
|
93 |
raw_str = example['contexts_highlighted']
|
94 |
|
95 |
# First, manually parse the highlighted contexts using regex
|
|
|
5 |
import re
|
6 |
from .context_processor import process_highlights
|
7 |
|
8 |
+
# Global data store - loaded once at import time
|
9 |
+
_ARENA_DATA = None
|
10 |
+
|
11 |
def load_arena_data():
|
12 |
"""
|
13 |
Loads the arena data from the arena_df.csv file in the utils directory.
|
14 |
Returns the data in a format compatible with the application.
|
15 |
"""
|
16 |
+
global _ARENA_DATA
|
17 |
+
|
18 |
+
# If data is already loaded, return it
|
19 |
+
if _ARENA_DATA is not None:
|
20 |
+
return _ARENA_DATA
|
21 |
+
|
22 |
try:
|
23 |
# Define the path to the CSV file
|
24 |
csv_path = os.path.join('utils', 'arena_df.csv')
|
|
|
26 |
# Read the CSV file
|
27 |
df = pd.read_csv(csv_path)
|
28 |
print(f"Loaded arena data with {len(df)} examples")
|
29 |
+
|
30 |
+
# Store the data globally
|
31 |
+
_ARENA_DATA = df
|
32 |
return df
|
33 |
except Exception as e:
|
34 |
print(f"Error loading arena data: {e}")
|
|
|
51 |
Selects a random example from the loaded arena data.
|
52 |
Returns the example data in a format compatible with the application.
|
53 |
"""
|
54 |
+
# Get the globally stored data - won't reload from disk
|
55 |
df = load_arena_data()
|
56 |
|
57 |
if df.empty:
|
|
|
101 |
|
102 |
if isinstance(example['contexts_highlighted'], str):
|
103 |
try:
|
104 |
+
# Try direct JSON parsing first
|
105 |
raw_str = example['contexts_highlighted']
|
106 |
|
107 |
# First, manually parse the highlighted contexts using regex
|