Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,9 +16,15 @@ import spaces
|
|
| 16 |
# Use dotenv to load the environment variables
|
| 17 |
load_dotenv()
|
| 18 |
|
| 19 |
-
# Get
|
| 20 |
HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
if not HF_TOKEN:
|
| 23 |
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
|
| 24 |
|
|
@@ -162,13 +168,13 @@ LABEL_MAPPING = {
|
|
| 162 |
|
| 163 |
# Load and prepare the dataset
|
| 164 |
print("Loading dataset...")
|
| 165 |
-
dataset = load_dataset(
|
| 166 |
|
| 167 |
# Convert string labels to integers
|
| 168 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 169 |
|
| 170 |
# Split dataset
|
| 171 |
-
train_test = dataset["train"].train_test_split(test_size=
|
| 172 |
train_dataset = train_test["train"]
|
| 173 |
test_dataset = train_test["test"]
|
| 174 |
|
|
|
|
| 16 |
# Use dotenv to load the environment variables
|
| 17 |
load_dotenv()
|
| 18 |
|
| 19 |
+
# Get environment variables
|
| 20 |
HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
|
| 21 |
+
DATASET_NAME = os.getenv("DATASET_NAME", "QuotaClimat/frugalaichallenge-text-train") # Default to public dataset
|
| 22 |
+
TEST_SIZE = float(os.getenv("TEST_SIZE", "0.2")) # Default to 20% test size
|
| 23 |
+
TEST_SEED = int(os.getenv("TEST_SEED", "42")) # Default seed for reproducibility
|
| 24 |
+
|
| 25 |
+
print(f"Using dataset: {DATASET_NAME}")
|
| 26 |
+
print(f"Test split size: {TEST_SIZE}")
|
| 27 |
+
|
| 28 |
if not HF_TOKEN:
|
| 29 |
print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
|
| 30 |
|
|
|
|
| 168 |
|
| 169 |
# Load and prepare the dataset
|
| 170 |
print("Loading dataset...")
|
| 171 |
+
dataset = load_dataset(DATASET_NAME)
|
| 172 |
|
| 173 |
# Convert string labels to integers
|
| 174 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
| 175 |
|
| 176 |
# Split dataset
|
| 177 |
+
train_test = dataset["train"].train_test_split(test_size=TEST_SIZE, seed=TEST_SEED)
|
| 178 |
train_dataset = train_test["train"]
|
| 179 |
test_dataset = train_test["test"]
|
| 180 |
|