Spaces:

PECCAVI-TEXT
/

peccavi

Sleeping

App Files Files Community

peccavi / utils /entailment.py

PECCAVI-TEXT

Upload 52 files

7e96e8d verified 4 months ago

raw

history blame contribute delete

4.19 kB

	import sys
	import os
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

	import numpy as np
	from transformers import pipeline
	from typing import List
	from utils.config import load_config


	class EntailmentAnalyzer:
	# def __init__(self, config_path: str):
	def __init__(self, config):
	"""
	Initialize the EntailmentAnalyzer with the config file path.

	Args:
	config_path: The path to the configuration file.
	"""
	# self.config = load_config(config_path)['PECCAVI_TEXT']['Entailment']
	self.config = config
	self.entailment_pipeline = pipeline(task=self.config['task'], model=self.config['model'])

	def check_entailment(self, premise: str, hypothesis: str) -> float:
	"""
	Check entailment between the premise and hypothesis.

	Args:
	premise: The premise sentence.
	hypothesis: The hypothesis sentence.

	Returns:
	float: The entailment score.
	"""
	results = self.entailment_pipeline(f"{premise} [SEP] {hypothesis}", top_k=None)
	entailment_score = next(item['score'] for item in results if item['label'] == 'entailment')
	return entailment_score

	def analyze_entailment(self, original_sentence: str, paraphrased_sentences: List[str], threshold: float) -> tuple:
	"""
	Analyze entailment scores for paraphrased sentences. If no selected sentences are found,
	lower the threshold and rerun the analysis.

	Args:
	original_sentence: The original sentence.
	paraphrased_sentences: List of paraphrased sentences.
	threshold: Minimum score to select a sentence.

	Returns:
	tuple: A dictionary of all scores, selected sentences, and discarded sentences.
	"""
	all_sentences = {}
	selected_sentences = {}
	discarded_sentences = {}

	# Loop to reduce threshold if no sentences are selected
	while not selected_sentences:
	for paraphrased_sentence in paraphrased_sentences:
	entailment_score = self.check_entailment(original_sentence, paraphrased_sentence)

	all_sentences[paraphrased_sentence] = entailment_score
	if entailment_score >= threshold:
	selected_sentences[paraphrased_sentence] = entailment_score
	else:
	discarded_sentences[paraphrased_sentence] = entailment_score

	# If no sentences are selected, lower the threshold
	if not selected_sentences:
	print(f"No selected sentences found. Lowering the threshold by 0.1 (from {threshold} to {threshold - 0.1}).")
	threshold -= 0.1
	if threshold <= 0:
	print("Threshold has reached 0. No sentences meet the criteria.")
	break

	return all_sentences, selected_sentences, discarded_sentences


	if __name__ == "__main__":
	config_path = os.path.join(os.path.dirname(__file__), '..', 'config', 'config.yaml')

	config_path = '/home/ashhar21137/text_wm/scratch/utils/config/config.yaml'

	config = load_config(config_path)

	entailment_analyzer = EntailmentAnalyzer(config['PECCAVI_TEXT']['Entailment'])

	all_sentences, selected_sentences, discarded_sentences = entailment_analyzer.analyze_entailment(
	"The weather is nice today",
	[
	"The climate is pleasant today",
	"It's a good day weather-wise",
	"Today, the weather is terrible",
	"What a beautiful day it is",
	"The sky is clear and the weather is perfect",
	"It's pouring rain outside today",
	"The weather isn't bad today",
	"A lovely day for outdoor activities"
	],
	0.7
	)

	print("----------------------- All Sentences -----------------------")
	print(all_sentences)
	print("----------------------- Discarded Sentences -----------------------")
	print(discarded_sentences)
	print("----------------------- Selected Sentences -----------------------")
	print(selected_sentences)