biswanathroul
/

efficient-context

efficient-context

context-optimization

cpu-optimization

resource-constrained

memory-management

Model card Files Files and versions Community

efficient-context / examples /dedup_test.py

biswanath2.roul

Initial commit

e4d5155 10 days ago

history blame contribute delete

1.79 kB

	"""
	Manual benchmark for the SemanticDeduplicator component.
	"""

	import sys
	import logging
	from efficient_context.compression import SemanticDeduplicator

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def main():
	print("Testing SemanticDeduplicator")

	# Create a repetitive document with semantically similar sentences
	repetitive_text = """
	Climate change is a significant global challenge.
	Global warming is affecting ecosystems worldwide.
	The Earth's temperature is rising due to human activities.
	Climate change poses a serious threat to our planet.
	Rising global temperatures are causing environmental problems.

	Renewable energy is key to a sustainable future.
	Clean energy sources help reduce carbon emissions.
	Sustainable power generation is vital for fighting climate change.
	Green energy technologies are becoming more affordable.
	Renewable resources provide alternatives to fossil fuels.
	"""

	print(f"Original text length: {len(repetitive_text.split())} words")

	# Test with different thresholds
	for threshold in [0.7, 0.8, 0.85, 0.9, 0.95]:
	print(f"\nTesting threshold: {threshold}")

	deduplicator = SemanticDeduplicator(threshold=threshold)

	# Apply deduplication
	compressed_text = deduplicator.compress(repetitive_text)

	print(f"Compressed text length: {len(compressed_text.split())} words")
	print(f"Compression ratio: {len(compressed_text.split()) / len(repetitive_text.split()):.2f}")

	# Print the first 100 characters of the compressed text
	print(f"Compressed text (preview): {compressed_text[:100]}...")

	if __name__ == "__main__":
	main()