Spaces:

tonneli
/

EureCA

Sleeping

App Files Files Community

EureCA / dsp /evaluation /utils.py

tonneli

Delete history

f5776d3 over 1 year ago

raw

history blame contribute delete

2.6 kB


	import dsp
	import tqdm
	import pandas as pd

	try:
	from IPython.display import display as ipython_display
	except ImportError:
	ipython_display = print
	from dsp.utils import EM, F1, HotPotF1


	def evaluateRetrieval(fn, dev, metric=None):
	data = []

	for example in tqdm.tqdm(dev):
	question = example.question
	prediction = fn(question)

	d = dict(example)

	# d['prediction'] = prediction.answer
	d['correct'] = dsp.passage_match(prediction.context, example.answer)
	data.append(d)

	df = pd.DataFrame(data)

	percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
	print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
	df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')

	pd.options.display.max_colwidth = None
	ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))


	def evaluateAnswer(fn, dev, metric=EM):
	data = []

	for example in tqdm.tqdm(dev):
	question = example.question
	prediction = fn(question)

	d = dict(example)

	pred = prediction.answer

	d['prediction'] = pred
	d['correct'] = metric(pred, example.answer)
	data.append(d)

	df = pd.DataFrame(data)

	percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
	print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
	df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')

	pd.options.display.max_colwidth = None
	ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))



	def evaluate(fn, dev, metric=EM):
	data = []

	for example in tqdm.tqdm(dev):
	question = example.question
	prediction = fn(question)

	d = dict(example)

	pred = prediction#.answer

	d['prediction'] = pred
	d['correct'] = metric(pred, example.answer)
	data.append(d)

	df = pd.DataFrame(data)

	percentage = round(100.0 * df['correct'].sum() / len(dev), 1)
	print(f"Answered {df['correct'].sum()} / {len(dev)} ({percentage}%) correctly.")
	df['correct'] = df['correct'].apply(lambda x: '✔️' if x else '❌')

	pd.options.display.max_colwidth = None
	ipython_display(df.style.set_table_styles([{'selector': 'th', 'props': [('text-align', 'left')]}, {'selector': 'td', 'props': [('text-align', 'left')]}]))

	return percentage