Spaces:
Runtime error
Runtime error
Load tokenizer from parent model & app.py fixes
Browse files- app.py +9 -8
- detect-pretrain-code-contamination/src/run.py +4 -9
app.py
CHANGED
|
@@ -6,6 +6,8 @@ import time
|
|
| 6 |
import pandas as pd
|
| 7 |
from threading import Thread
|
| 8 |
import numpy as np
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
|
| 11 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
|
|
@@ -52,6 +54,9 @@ def save_to_txt(model, results, model_type,ref_model):
|
|
| 52 |
|
| 53 |
with open(file_path, "a") as f:
|
| 54 |
f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
|
|
|
|
|
|
|
|
|
| 55 |
f.close()
|
| 56 |
|
| 57 |
def run_test(model,ref_model,data):
|
|
@@ -88,7 +93,9 @@ def worker_thread():
|
|
| 88 |
for submission in modelQueue:
|
| 89 |
#evaluate(submission[1],submission[0].split(" ")[0],submission[2])
|
| 90 |
#modelQueue.pop(modelQueue.index(submission))
|
| 91 |
-
|
|
|
|
|
|
|
| 92 |
# Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
|
| 93 |
# I highly encourage you to try to reproduce the results I get using your own implementation.
|
| 94 |
# Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
|
|
@@ -105,16 +112,10 @@ def queue(model,model_type,ref_model):
|
|
| 105 |
file_path = "data/queue.csv"
|
| 106 |
with open(file_path, "a") as f:
|
| 107 |
model = model.strip()
|
|
|
|
| 108 |
f.write(f"\n{model_type},{model},{ref_model}")
|
| 109 |
f.close()
|
| 110 |
print(f"QUEUE:\n{modelQueue}")
|
| 111 |
-
|
| 112 |
-
eval_entry = {
|
| 113 |
-
"model": model,
|
| 114 |
-
"model_type": model_type,
|
| 115 |
-
"ref_model": ref_model,
|
| 116 |
-
}
|
| 117 |
-
|
| 118 |
|
| 119 |
### bigcode/bigcode-models-leaderboard
|
| 120 |
def add_new_eval(
|
|
|
|
| 6 |
import pandas as pd
|
| 7 |
from threading import Thread
|
| 8 |
import numpy as np
|
| 9 |
+
import discord
|
| 10 |
+
from discord.ext import commands
|
| 11 |
|
| 12 |
# Add the path to the "src" directory of detect-pretrain-code-contamination to the sys.path
|
| 13 |
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "detect-pretrain-code-contamination"))
|
|
|
|
| 54 |
|
| 55 |
with open(file_path, "a") as f:
|
| 56 |
f.write(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
| 57 |
+
|
| 58 |
+
print(f"Finished evaluation of model: {model} using ref_model: {ref_model}")
|
| 59 |
+
print(f"\n{model_type},{model}," + str(formatr(results["arc"])) + "," + str(formatr(results["hellaswag"])) + "," + str(formatr(results["mmlu"])) + "," + str(formatr(results["truthfulQA"])) + "," + str(formatr(results["winogrande"])) + "," + str(formatr(results["gsm8k"])) + f",{ref_model}")
|
| 60 |
f.close()
|
| 61 |
|
| 62 |
def run_test(model,ref_model,data):
|
|
|
|
| 93 |
for submission in modelQueue:
|
| 94 |
#evaluate(submission[1],submission[0].split(" ")[0],submission[2])
|
| 95 |
#modelQueue.pop(modelQueue.index(submission))
|
| 96 |
+
#exit()
|
| 97 |
+
|
| 98 |
+
#The exit above is temporal while I figure out how to unload a model from a thread or similar.
|
| 99 |
# Uncomment those lines in order to begin testing, I test these models outside of this space and later commit the results back.
|
| 100 |
# I highly encourage you to try to reproduce the results I get using your own implementation.
|
| 101 |
# Do NOT take anything listed here as fact, as I'm not 100% my implementation works as intended.
|
|
|
|
| 112 |
file_path = "data/queue.csv"
|
| 113 |
with open(file_path, "a") as f:
|
| 114 |
model = model.strip()
|
| 115 |
+
ref_model = ref_model.strip()
|
| 116 |
f.write(f"\n{model_type},{model},{ref_model}")
|
| 117 |
f.close()
|
| 118 |
print(f"QUEUE:\n{modelQueue}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
### bigcode/bigcode-models-leaderboard
|
| 121 |
def add_new_eval(
|
detect-pretrain-code-contamination/src/run.py
CHANGED
|
@@ -40,15 +40,11 @@ def load_data(filename):
|
|
| 40 |
def unload_model(model,tokenizer):
|
| 41 |
print("[X] Cannot unload model! Functionality not implemented!")
|
| 42 |
|
| 43 |
-
def load_model(name1):
|
| 44 |
if name1 not in models:
|
| 45 |
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
| 46 |
model1.eval()
|
| 47 |
-
|
| 48 |
-
tokenizer1 = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
|
| 49 |
-
else:
|
| 50 |
-
tokenizer1 = AutoTokenizer.from_pretrained(name1)
|
| 51 |
-
|
| 52 |
tokenizer1.pad_token = tokenizer1.eos_token
|
| 53 |
models[name1] = model1
|
| 54 |
models[name1 + "_tokenizer"] = tokenizer1
|
|
@@ -124,7 +120,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
| 124 |
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
| 125 |
except:
|
| 126 |
### MODEL 2 likelihoods
|
| 127 |
-
model2, tokenizer2 = load_model(ref_model)
|
| 128 |
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
| 129 |
for ex in tqdm(test_data):
|
| 130 |
text = ex[col_name]
|
|
@@ -147,7 +143,7 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
| 147 |
print("Saved ref data, exiting.")
|
| 148 |
|
| 149 |
### MODEL 1 likelihoods
|
| 150 |
-
model1, tokenizer1 = load_model(target_model)
|
| 151 |
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
| 152 |
for ex in tqdm(test_data):
|
| 153 |
text = ex[col_name]
|
|
@@ -155,7 +151,6 @@ def evaluate_data(test_data, col_name, target_model, ref_model, ratio_gen, data_
|
|
| 155 |
inference1_pass.append(new_ex)
|
| 156 |
|
| 157 |
### RIMA results
|
| 158 |
-
model1, tokenizer1 = load_model(target_model)
|
| 159 |
counter = 0
|
| 160 |
results = []
|
| 161 |
for ex in tqdm(test_data):
|
|
|
|
| 40 |
def unload_model(model,tokenizer):
|
| 41 |
print("[X] Cannot unload model! Functionality not implemented!")
|
| 42 |
|
| 43 |
+
def load_model(name1,ref_model):
|
| 44 |
if name1 not in models:
|
| 45 |
model1 = AutoModelForCausalLM.from_pretrained(name1, return_dict=True, device_map='auto')
|
| 46 |
model1.eval()
|
| 47 |
+
tokenizer1 = AutoTokenizer.from_pretrained(ref_model)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
tokenizer1.pad_token = tokenizer1.eos_token
|
| 49 |
models[name1] = model1
|
| 50 |
models[name1 + "_tokenizer"] = tokenizer1
|
|
|
|
| 120 |
neighbors_dls = load_data(f'saves/{ref_model_clean}/{data_name_clean}/neighbors_dls.txt')
|
| 121 |
except:
|
| 122 |
### MODEL 2 likelihoods
|
| 123 |
+
model2, tokenizer2 = load_model(ref_model,ref_model)
|
| 124 |
inference2_pass = [] #0: p_ref, #1: all_prob_ref, #2: p_ref_likelihood
|
| 125 |
for ex in tqdm(test_data):
|
| 126 |
text = ex[col_name]
|
|
|
|
| 143 |
print("Saved ref data, exiting.")
|
| 144 |
|
| 145 |
### MODEL 1 likelihoods
|
| 146 |
+
model1, tokenizer1 = load_model(target_model,ref_model)
|
| 147 |
inference1_pass = [] #0: p1, #1: all_prob, #2: p1_likelihood, #3: p_lower, #4: p_lower_likelihood
|
| 148 |
for ex in tqdm(test_data):
|
| 149 |
text = ex[col_name]
|
|
|
|
| 151 |
inference1_pass.append(new_ex)
|
| 152 |
|
| 153 |
### RIMA results
|
|
|
|
| 154 |
counter = 0
|
| 155 |
results = []
|
| 156 |
for ex in tqdm(test_data):
|