biswanath2.roul commited on May 20

Commit

e4d5155

0 Parent(s):

Initial commit

Files changed (39) hide show

.gitignore +44 -0
CONTRIBUTING.md +72 -0
LICENSE +21 -0
MANIFEST.in +6 -0
PUBLISHING.md +81 -0
README.md +73 -0
debug.py +19 -0
docs/usage.md +222 -0
efficient_context/__init__.py +9 -0
efficient_context/chunking/__init__.py +8 -0
efficient_context/chunking/base.py +54 -0
efficient_context/chunking/semantic_chunker.py +295 -0
efficient_context/compression/__init__.py +8 -0
efficient_context/compression/base.py +23 -0
efficient_context/compression/semantic_deduplicator.py +261 -0
efficient_context/context_manager.py +169 -0
efficient_context/memory/__init__.py +7 -0
efficient_context/memory/memory_manager.py +134 -0
efficient_context/retrieval/__init__.py +8 -0
efficient_context/retrieval/base.py +40 -0
efficient_context/retrieval/cpu_optimized_retriever.py +247 -0
efficient_context/utils/__init__.py +12 -0
efficient_context/utils/text.py +120 -0
examples/basic_usage.py +92 -0
examples/benchmark.py +209 -0
examples/dedup_benchmark.py +214 -0
examples/dedup_eval.py +114 -0
examples/dedup_test.py +49 -0
examples/deduplication_benchmark.py +277 -0
examples/demo_notebook.ipynb +0 -0
examples/llm_integration.py +164 -0
examples/simple_dedup_benchmark.py +92 -0
examples/simple_test.py +69 -0
model_card.md +91 -0
pyproject.toml +14 -0
requirements.txt +7 -0
setup.py +31 -0
test_simple.py +75 -0
tests/test_core.py +114 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Python bytecode
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+dist/
+build/
+*.egg-info/
+# Virtual environments
+venv/
+env/
+ENV/
+# Testing
+.coverage
+htmlcov/
+.pytest_cache/
+# Environment variables
+.env
+# IDE specific files
+.idea/
+.vscode/
+*.swp
+*.swo
+# OS specific files
+.DS_Store
+Thumbs.db
+# Jupyter Notebook
+.ipynb_checkpoints
+# Build logs
+*.log
+# Hugging Face specific
+.huggingface/
+*.safetensors
+wandb/
+outputs/

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,72 @@

+# Contributing to efficient-context
+Thank you for considering contributing to efficient-context! This document provides guidelines and instructions for contributing.
+## Code of Conduct
+By participating in this project, you agree to abide by our [Code of Conduct](CODE_OF_CONDUCT.md).
+## How Can I Contribute?
+### Reporting Bugs
+Bug reports help make efficient-context better for everyone. When reporting a bug, please include:
+1. A clear title and description
+2. Steps to reproduce the issue
+3. Expected behavior
+4. Actual behavior
+5. Environment details (OS, Python version, etc.)
+### Suggesting Enhancements
+We welcome suggestions for improvements! Please include:
+1. A clear description of the enhancement
+2. The rationale/use case
+3. Possible implementation approaches (if any)
+### Pull Requests
+1. Fork the repository
+2. Create a new branch for your feature or bug fix
+3. Make your changes with appropriate tests
+4. Ensure all tests pass
+5. Submit a pull request
+## Development Setup
+1. Clone the repository
+2. Create a virtual environment: `python -m venv venv`
+3. Activate the environment: `source venv/bin/activate` (Unix) or `venv\Scripts\activate` (Windows)
+4. Install development dependencies: `pip install -e ".[dev]"`
+## Testing
+Run tests with pytest:
+```bash
+pytest
+```
+## Style Guide
+This project follows PEP 8 with a line length of 88 characters (compatible with black).
+To format code:
+```bash
+black .
+isort .
+```
+## Documentation
+- Update documentation for any new features or changes
+- Add docstrings for classes and functions
+## Contact
+For questions, feel free to open an issue or contact [Biswanath Roul](https://github.com/biswanathroul).
+Thank you for contributing to efficient-context!

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Biswanath Roul
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

MANIFEST.in ADDED Viewed

	@@ -0,0 +1,6 @@

+include LICENSE
+include README.md
+include pyproject.toml
+recursive-include tests *
+recursive-exclude tests *.pyc
+recursive-exclude tests __pycache__

PUBLISHING.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# Publishing to PyPI
+This guide explains how to build and publish the `efficient-context` package to PyPI.
+## Prerequisites
+1. Create an account on PyPI: https://pypi.org/account/register/
+2. Install build and twine packages:
+```bash
+pip install build twine
+```
+## Build the Package
+1. Navigate to the project directory:
+```bash
+cd /path/to/efficient-context
+```
+2. Build the distribution packages:
+```bash
+python -m build
+```
+This will create a directory called `dist` containing both `.tar.gz` (source distribution) and `.whl` (built distribution) files.
+## Upload to TestPyPI (Recommended)
+Before publishing to the main PyPI repository, it's a good practice to test on TestPyPI:
+```bash
+python -m twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+```
+You'll be prompted for your TestPyPI username and password.
+Then install from TestPyPI to verify it works:
+```bash
+pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple efficient-context
+```
+## Upload to PyPI
+Once you've verified everything works correctly, upload to the actual PyPI:
+```bash
+python -m twine upload dist/*
+```
+You'll be prompted for your PyPI username and password.
+## Verify Installation
+After uploading, verify that your package can be installed from PyPI:
+```bash
+pip install efficient-context
+```
+## Updating the Package
+To update the package:
+1. Update the version number in `setup.py`
+2. Rebuild the package: `python -m build`
+3. Upload to PyPI again: `python -m twine upload dist/*`
+## GitHub Integration
+If your code is hosted on GitHub, you may want to set up GitHub Actions to automatically build and publish your package when you create a new release. The code for this project is available at: https://github.com/biswanathroul/efficient-context
+## Tips
+- Always increment the version number in `setup.py` before publishing a new version
+- Keep your PyPI credentials secure
+- Include comprehensive documentation and examples in your package
+- Add proper classifiers in `setup.py` for better searchability

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+# efficient-context
+A Python library for optimizing LLM context handling in CPU-constrained environments.
+## Overview
+`efficient-context` addresses the challenge of working with large language models (LLMs) on CPU-only and memory-limited systems by providing efficient context management strategies. The library focuses on:
+- **Context Compression**: Reduce memory requirements while preserving information quality
+- **Semantic Chunking**: Go beyond token-based approaches for more effective context management
+- **Retrieval Optimization**: Minimize context size through intelligent retrieval strategies
+- **Memory Management**: Handle large contexts on limited hardware resources
+## Installation
+```bash
+pip install efficient-context
+```
+## Quick Start
+```python
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Initialize a context manager with custom strategies
+context_manager = ContextManager(
+    compressor=SemanticDeduplicator(threshold=0.85),
+    chunker=SemanticChunker(chunk_size=256),
+    retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+)
+# Add documents to your context
+context_manager.add_documents(documents)
+# Generate optimized context for a query
+optimized_context = context_manager.generate_context(query="Tell me about the climate impact of renewable energy")
+# Use the optimized context with your LLM
+response = your_llm_model.generate(prompt=prompt, context=optimized_context)
+```
+## Features
+### Context Compression
+- Semantic deduplication to remove redundant information
+- Importance-based pruning that keeps critical information
+- Automatic summarization of less relevant sections
+### Advanced Chunking
+- Semantic chunking that preserves logical units
+- Adaptive chunk sizing based on content complexity
+- Chunk relationships mapping for coherent retrieval
+### Retrieval Optimization
+- Lightweight embedding models optimized for CPU
+- Tiered retrieval strategies (local vs. remote)
+- Query-aware context assembly
+### Memory Management
+- Progressive loading/unloading of context
+- Streaming context processing
+- Memory-aware caching strategies
+## Maintainer
+This project is maintained by [Biswanath Roul](https://github.com/biswanathroul)
+## License
+MIT

debug.py ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/usr/bin/env python
+"""
+Debug script for efficient-context.
+"""
+import sys
+import os
+print(f"Python version: {sys.version}")
+print(f"Current working directory: {os.getcwd()}")
+print(f"Python path: {sys.path}")
+try:
+    import efficient_context
+    print(f"Successfully imported efficient_context: {efficient_context.__file__}")
+except ImportError as e:
+    print(f"Failed to import efficient_context: {e}")
+print("Script completed")

docs/usage.md ADDED Viewed

	@@ -0,0 +1,222 @@

+# efficient-context Documentation
+## Overview
+`efficient-context` is a Python library designed to optimize the handling of context for Large Language Models (LLMs) in CPU-constrained environments. It addresses the challenges of using LLMs with limited computational resources by providing efficient context management strategies.
+## Key Features
+1. **Context Compression**: Reduce memory requirements while preserving information quality
+2. **Semantic Chunking**: Go beyond token-based approaches for more effective context management
+3. **Retrieval Optimization**: Minimize context size through intelligent retrieval strategies
+4. **Memory Management**: Handle large contexts on limited hardware resources
+## Installation
+```bash
+pip install efficient-context
+```
+## Core Components
+### ContextManager
+The central class that orchestrates all components of the library.
+```python
+from efficient_context import ContextManager
+# Initialize with default settings
+context_manager = ContextManager()
+# Add documents
+context_manager.add_document("This is a sample document about renewable energy...")
+context_manager.add_documents([doc1, doc2, doc3])  # Add multiple documents
+# Generate context for a query
+optimized_context = context_manager.generate_context(query="Tell me about renewable energy")
+```
+### Context Compression
+The compression module reduces the size of content while preserving key information.
+```python
+from efficient_context.compression import SemanticDeduplicator
+# Initialize with custom settings
+compressor = SemanticDeduplicator(
+    threshold=0.85,  # Similarity threshold for deduplication
+    embedding_model="lightweight",  # Use a lightweight embedding model
+    min_sentence_length=10,  # Minimum length of sentences to consider
+    importance_weight=0.3  # Weight given to sentence importance vs. deduplication
+)
+# Compress content
+compressed_content = compressor.compress(
+    content="Your large text content here...",
+    target_size=1000  # Optional target size in tokens
+)
+```
+### Semantic Chunking
+The chunking module divides content into semantically coherent chunks.
+```python
+from efficient_context.chunking import SemanticChunker
+# Initialize with custom settings
+chunker = SemanticChunker(
+    chunk_size=512,  # Target size for chunks in tokens
+    chunk_overlap=50,  # Number of tokens to overlap between chunks
+    respect_paragraphs=True,  # Avoid breaking paragraphs across chunks
+    min_chunk_size=100,  # Minimum chunk size in tokens
+    max_chunk_size=1024  # Maximum chunk size in tokens
+)
+# Chunk content
+chunks = chunker.chunk(
+    content="Your large text content here...",
+    document_id="doc-1",  # Optional document ID
+    metadata={"source": "example", "author": "John Doe"}  # Optional metadata
+)
+```
+### Retrieval Optimization
+The retrieval module finds the most relevant chunks for a query.
+```python
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Initialize with custom settings
+retriever = CPUOptimizedRetriever(
+    embedding_model="lightweight",  # Use a lightweight embedding model
+    similarity_metric="cosine",  # Metric for comparing embeddings
+    use_batching=True,  # Batch embedding operations
+    batch_size=32,  # Size of batches for embedding
+    max_index_size=5000  # Maximum number of chunks to keep in the index
+)
+# Index chunks
+retriever.index_chunks(chunks)
+# Retrieve relevant chunks
+relevant_chunks = retriever.retrieve(
+    query="Your query here...",
+    top_k=5  # Number of chunks to retrieve
+)
+```
+### Memory Management
+The memory module helps optimize memory usage during operations.
+```python
+from efficient_context.memory import MemoryManager
+# Initialize with custom settings
+memory_manager = MemoryManager(
+    target_usage_percent=80.0,  # Target memory usage percentage
+    aggressive_cleanup=False,  # Whether to perform aggressive garbage collection
+    memory_monitor_interval=None  # Interval for memory monitoring in seconds
+)
+# Use context manager for memory-intensive operations
+with memory_manager.optimize_memory():
+    # Run memory-intensive operations here
+    results = process_large_documents(documents)
+# Get memory usage statistics
+memory_stats = memory_manager.get_memory_usage()
+print(f"Process memory: {memory_stats['process_rss_bytes'] / (1024*1024):.2f} MB")
+```
+## Advanced Usage
+### Customizing the Context Manager
+```python
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+from efficient_context.memory import MemoryManager
+# Initialize a fully customized context manager
+context_manager = ContextManager(
+    compressor=SemanticDeduplicator(threshold=0.85),
+    chunker=SemanticChunker(chunk_size=256, chunk_overlap=50),
+    retriever=CPUOptimizedRetriever(embedding_model="lightweight"),
+    memory_manager=MemoryManager(target_usage_percent=80.0),
+    max_context_size=4096
+)
+```
+### Integration with LLMs
+```python
+from efficient_context import ContextManager
+from your_llm_library import LLM  # Replace with your actual LLM library
+# Initialize components
+context_manager = ContextManager()
+llm = LLM(model="lightweight-model")
+# Process documents
+context_manager.add_documents(documents)
+# For each query
+query = "Tell me about renewable energy"
+optimized_context = context_manager.generate_context(query=query)
+# Use context with the LLM
+response = llm.generate(
+    prompt=query,
+    context=optimized_context,
+    max_tokens=512
+)
+```
+## Performance Considerations
+- **Memory Usage**: The library is designed to be memory-efficient, but be aware that embedding models may still require significant memory.
+- **CPU Performance**: Choose the appropriate embedding model based on your CPU capabilities. The `lightweight` option is recommended for constrained environments.
+- **Batch Size**: Adjust the `batch_size` parameter in retrieval to balance between memory usage and processing speed.
+- **Context Size**: Setting appropriate `max_context_size` can significantly impact performance, especially when working with limited resources.
+## Extending the Library
+You can create custom implementations of the base classes to adapt the library to your specific needs:
+```python
+from efficient_context.compression.base import BaseCompressor
+class MyCustomCompressor(BaseCompressor):
+    def __init__(self, custom_param=None):
+        self.custom_param = custom_param
+    def compress(self, content, target_size=None):
+        # Your custom compression logic here
+        return compressed_content
+```
+## Troubleshooting
+**High Memory Usage**
+- Reduce `batch_size` in the retriever
+- Use a more lightweight embedding model
+- Decrease `max_index_size` to limit the number of chunks stored in memory
+**Slow Processing**
+- Increase `batch_size` (balancing with memory constraints)
+- Increase `threshold` in the SemanticDeduplicator to be more aggressive with deduplication
+- Reduce `chunk_overlap` to minimize redundant processing
+## Example Applications
+- **Chatbots on Edge Devices**: Enable context-aware conversations on devices with limited resources
+- **Document QA Systems**: Create efficient question-answering systems for large document collections
+- **Embedded AI Applications**: Incorporate context-aware LLM capabilities in embedded systems
+- **Mobile Applications**: Provide sophisticated LLM features in mobile apps with limited resources

efficient_context/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+efficient-context: A Python library for optimizing LLM context handling in CPU-constrained environments.
+"""
+__version__ = "0.1.0"
+from efficient_context.context_manager import ContextManager
+__all__ = ["ContextManager"]

efficient_context/chunking/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+Chunking components for efficient-context.
+"""
+from efficient_context.chunking.base import BaseChunker, Chunk
+from efficient_context.chunking.semantic_chunker import SemanticChunker
+__all__ = ["BaseChunker", "Chunk", "SemanticChunker"]

efficient_context/chunking/base.py ADDED Viewed

	@@ -0,0 +1,54 @@

+"""
+Base classes for context chunking components.
+"""
+from abc import ABC, abstractmethod
+from typing import List, Dict, Any, Optional
+class Chunk:
+    """Representation of a text chunk with metadata."""
+    def __init__(
+        self,
+        content: str,
+        chunk_id: str,
+        document_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Initialize a chunk.
+        Args:
+            content: The text content of the chunk
+            chunk_id: Unique identifier for the chunk
+            document_id: Optional ID of the source document
+            metadata: Optional metadata for the chunk
+        """
+        self.content = content
+        self.chunk_id = chunk_id
+        self.document_id = document_id
+        self.metadata = metadata or {}
+        self.embedding = None
+class BaseChunker(ABC):
+    """Base class for content chunking components."""
+    @abstractmethod
+    def chunk(
+        self,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+        document_id: Optional[str] = None
+    ) -> List[Chunk]:
+        """
+        Split content into chunks.
+        Args:
+            content: Content to be chunked
+            metadata: Optional metadata to associate with chunks
+            document_id: Optional document ID to associate with chunks
+        Returns:
+            chunks: List of Chunk objects
+        """
+        pass

efficient_context/chunking/semantic_chunker.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+Semantic chunking for intelligent context segmentation.
+"""
+import logging
+import uuid
+from typing import List, Dict, Any, Optional, Tuple
+from efficient_context.chunking.base import BaseChunker, Chunk
+from efficient_context.utils.text import split_into_sentences, calculate_text_overlap
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class SemanticChunker(BaseChunker):
+    """
+    Chunker that creates chunks based on semantic boundaries.
+    This chunker aims to keep semantically related content together, unlike
+    simple token-based chunking that might split content mid-thought.
+    """
+    def __init__(
+        self,
+        chunk_size: int = 512,
+        chunk_overlap: int = 50,
+        respect_paragraphs: bool = True,
+        min_chunk_size: int = 100,
+        max_chunk_size: int = 1024
+    ):
+        """
+        Initialize the SemanticChunker.
+        Args:
+            chunk_size: Target size for chunks in tokens (words)
+            chunk_overlap: Number of tokens to overlap between chunks
+            respect_paragraphs: Whether to avoid breaking paragraphs across chunks
+            min_chunk_size: Minimum chunk size in tokens
+            max_chunk_size: Maximum chunk size in tokens
+        """
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+        self.respect_paragraphs = respect_paragraphs
+        self.min_chunk_size = min_chunk_size
+        self.max_chunk_size = max_chunk_size
+        logger.info(
+            "SemanticChunker initialized with target size: %d tokens, overlap: %d tokens",
+            chunk_size, chunk_overlap
+        )
+    def _estimate_tokens(self, text: str) -> int:
+        """
+        Estimate the number of tokens in text.
+        Args:
+            text: Text to estimate tokens for
+        Returns:
+            token_count: Estimated number of tokens
+        """
+        # Simple whitespace-based token estimation
+        # This is much faster than using a tokenizer and good enough for chunking
+        return len(text.split())
+    def _identify_paragraphs(self, content: str) -> List[str]:
+        """
+        Split content into paragraphs.
+        Args:
+            content: Content to split
+        Returns:
+            paragraphs: List of paragraphs
+        """
+        # Split on empty lines (common paragraph separator)
+        paragraphs = [p.strip() for p in content.split("\n\n")]
+        # Handle other kinds of paragraph breaks and clean up
+        result = []
+        current = ""
+        for p in paragraphs:
+            # Skip empty paragraphs
+            if not p:
+                continue
+            # Handle single newlines that might indicate paragraphs
+            lines = p.split("\n")
+            for line in lines:
+                if not line.strip():
+                    if current:
+                        result.append(current)
+                        current = ""
+                else:
+                    if current:
+                        current += " " + line.strip()
+                    else:
+                        current = line.strip()
+            if current:
+                result.append(current)
+                current = ""
+        # Add any remaining content
+        if current:
+            result.append(current)
+        return result if result else [content]
+    def _create_semantic_chunks(
+        self,
+        paragraphs: List[str],
+        document_id: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> List[Chunk]:
+        """
+        Create chunks from paragraphs respecting semantic boundaries.
+        Args:
+            paragraphs: List of paragraphs to chunk
+            document_id: Optional ID of the source document
+            metadata: Optional metadata for the chunks
+        Returns:
+            chunks: List of Chunk objects
+        """
+        chunks = []
+        current_chunk_text = ""
+        current_token_count = 0
+        for paragraph in paragraphs:
+            paragraph_tokens = self._estimate_tokens(paragraph)
+            # Check if adding this paragraph would exceed the max chunk size
+            if (current_token_count + paragraph_tokens > self.max_chunk_size and
+                current_token_count >= self.min_chunk_size):
+                # Create a new chunk with the current content
+                chunk_id = str(uuid.uuid4())
+                chunk = Chunk(
+                    content=current_chunk_text.strip(),
+                    chunk_id=chunk_id,
+                    document_id=document_id,
+                    metadata=metadata
+                )
+                chunks.append(chunk)
+                # Start a new chunk with overlap
+                if self.chunk_overlap > 0 and current_chunk_text:
+                    # Get the last N tokens for overlap
+                    words = current_chunk_text.split()
+                    overlap_text = " ".join(words[-min(self.chunk_overlap, len(words)):])
+                    current_chunk_text = overlap_text + " " + paragraph
+                    current_token_count = self._estimate_tokens(current_chunk_text)
+                else:
+                    # No overlap
+                    current_chunk_text = paragraph
+                    current_token_count = paragraph_tokens
+            # Handle very large paragraphs that exceed max_chunk_size on their own
+            elif paragraph_tokens > self.max_chunk_size:
+                # If we have existing content, create a chunk first
+                if current_chunk_text:
+                    chunk_id = str(uuid.uuid4())
+                    chunk = Chunk(
+                        content=current_chunk_text.strip(),
+                        chunk_id=chunk_id,
+                        document_id=document_id,
+                        metadata=metadata
+                    )
+                    chunks.append(chunk)
+                    current_chunk_text = ""
+                    current_token_count = 0
+                # Split the large paragraph into sentences
+                sentences = split_into_sentences(paragraph)
+                sentence_chunk = ""
+                sentence_token_count = 0
+                for sentence in sentences:
+                    sentence_tokens = self._estimate_tokens(sentence)
+                    # Check if adding this sentence would exceed the max chunk size
+                    if (sentence_token_count + sentence_tokens > self.max_chunk_size and
+                        sentence_token_count >= self.min_chunk_size):
+                        # Create a new chunk with the current sentences
+                        chunk_id = str(uuid.uuid4())
+                        chunk = Chunk(
+                            content=sentence_chunk.strip(),
+                            chunk_id=chunk_id,
+                            document_id=document_id,
+                            metadata=metadata
+                        )
+                        chunks.append(chunk)
+                        # Start a new chunk with overlap
+                        if self.chunk_overlap > 0 and sentence_chunk:
+                            words = sentence_chunk.split()
+                            overlap_text = " ".join(words[-min(self.chunk_overlap, len(words)):])
+                            sentence_chunk = overlap_text + " " + sentence
+                            sentence_token_count = self._estimate_tokens(sentence_chunk)
+                        else:
+                            sentence_chunk = sentence
+                            sentence_token_count = sentence_tokens
+                    else:
+                        # Add the sentence to the current chunk
+                        if sentence_chunk:
+                            sentence_chunk += " " + sentence
+                        else:
+                            sentence_chunk = sentence
+                        sentence_token_count += sentence_tokens
+                # Add any remaining sentence content as a chunk
+                if sentence_chunk:
+                    chunk_id = str(uuid.uuid4())
+                    chunk = Chunk(
+                        content=sentence_chunk.strip(),
+                        chunk_id=chunk_id,
+                        document_id=document_id,
+                        metadata=metadata
+                    )
+                    chunks.append(chunk)
+            else:
+                # Add the paragraph to the current chunk
+                if current_chunk_text:
+                    current_chunk_text += " " + paragraph
+                else:
+                    current_chunk_text = paragraph
+                current_token_count += paragraph_tokens
+                # Check if we've reached the target chunk size
+                if current_token_count >= self.chunk_size:
+                    chunk_id = str(uuid.uuid4())
+                    chunk = Chunk(
+                        content=current_chunk_text.strip(),
+                        chunk_id=chunk_id,
+                        document_id=document_id,
+                        metadata=metadata
+                    )
+                    chunks.append(chunk)
+                    # Start a new chunk with overlap
+                    if self.chunk_overlap > 0:
+                        words = current_chunk_text.split()
+                        current_chunk_text = " ".join(words[-min(self.chunk_overlap, len(words)):])
+                        current_token_count = self._estimate_tokens(current_chunk_text)
+                    else:
+                        current_chunk_text = ""
+                        current_token_count = 0
+        # Add any remaining content as a final chunk
+        if current_chunk_text and current_token_count >= self.min_chunk_size:
+            chunk_id = str(uuid.uuid4())
+            chunk = Chunk(
+                content=current_chunk_text.strip(),
+                chunk_id=chunk_id,
+                document_id=document_id,
+                metadata=metadata
+            )
+            chunks.append(chunk)
+        return chunks
+    def chunk(
+        self,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+        document_id: Optional[str] = None
+    ) -> List[Chunk]:
+        """
+        Split content into semantic chunks.
+        Args:
+            content: Content to be chunked
+            metadata: Optional metadata to associate with chunks
+            document_id: Optional document ID to associate with chunks
+        Returns:
+            chunks: List of Chunk objects
+        """
+        if not content.strip():
+            return []
+        # Identify paragraphs
+        if self.respect_paragraphs:
+            paragraphs = self._identify_paragraphs(content)
+        else:
+            # Treat the whole content as one paragraph
+            paragraphs = [content]
+        # Create chunks from paragraphs
+        chunks = self._create_semantic_chunks(paragraphs, document_id, metadata)
+        logger.info("Created %d chunks from content", len(chunks))
+        return chunks

efficient_context/compression/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+Compression components for efficient-context.
+"""
+from efficient_context.compression.base import BaseCompressor
+from efficient_context.compression.semantic_deduplicator import SemanticDeduplicator
+__all__ = ["BaseCompressor", "SemanticDeduplicator"]

efficient_context/compression/base.py ADDED Viewed

	@@ -0,0 +1,23 @@

+"""
+Base classes for context compression components.
+"""
+from abc import ABC, abstractmethod
+from typing import Optional
+class BaseCompressor(ABC):
+    """Base class for content compression components."""
+    @abstractmethod
+    def compress(self, content: str, target_size: Optional[int] = None) -> str:
+        """
+        Compress content to reduce size while preserving key information.
+        Args:
+            content: The content to compress
+            target_size: Optional target size for the compressed content
+        Returns:
+            compressed_content: The compressed content
+        """
+        pass

efficient_context/compression/semantic_deduplicator.py ADDED Viewed

	@@ -0,0 +1,261 @@

+"""
+Semantic deduplication for compressing context content.
+"""
+import logging
+from typing import List, Optional, Tuple, Dict, Any
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+from efficient_context.compression.base import BaseCompressor
+from efficient_context.utils.text import split_into_sentences, get_sentence_importance
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class SemanticDeduplicator(BaseCompressor):
+    """
+    Compressor that removes semantically duplicate or redundant content.
+    This compressor identifies and removes sentences that are semantically
+    similar to others in the content, keeping only the most representative ones.
+    It's designed to be CPU-friendly and memory-efficient.
+    """
+    def __init__(
+        self,
+        threshold: float = 0.85,
+        embedding_model: str = "lightweight",
+        min_sentence_length: int = 10,
+        importance_weight: float = 0.3,
+    ):
+        """
+        Initialize the SemanticDeduplicator.
+        Args:
+            threshold: Similarity threshold for considering content duplicated (0.0 to 1.0)
+            embedding_model: The model to use for generating embeddings
+            min_sentence_length: Minimum length of sentences to consider
+            importance_weight: Weight given to sentence importance vs. deduplication
+        """
+        self.threshold = threshold
+        self.embedding_model = embedding_model
+        self.min_sentence_length = min_sentence_length
+        self.importance_weight = importance_weight
+        # Initialize the embedding model
+        self._init_embedding_model()
+        logger.info("SemanticDeduplicator initialized with threshold: %.2f", threshold)
+    def _init_embedding_model(self):
+        """Initialize the embedding model based on the selected type."""
+        try:
+            from sentence_transformers import SentenceTransformer
+            # Choose a lightweight model for CPU efficiency
+            if self.embedding_model == "lightweight":
+                # MiniLM models are lightweight and efficient
+                self.model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
+            else:
+                # Default to a balanced model
+                self.model = SentenceTransformer(self.embedding_model)
+            logger.info("Using embedding model: %s", self.model.get_sentence_embedding_dimension())
+        except ImportError:
+            logger.warning("SentenceTransformer not available, using numpy fallback (less accurate)")
+            self.model = None
+    def _get_embeddings(self, sentences: List[str]) -> np.ndarray:
+        """
+        Get embeddings for a list of sentences.
+        Args:
+            sentences: List of sentences to embed
+        Returns:
+            embeddings: Array of sentence embeddings
+        """
+        if not sentences:
+            return np.array([])
+        if self.model is not None:
+            # Use the sentence transformer if available
+            return self.model.encode(sentences, show_progress_bar=False)
+        else:
+            # Fallback to a simple Bag-of-Words approach
+            # This is much less accurate but works without dependencies
+            from sklearn.feature_extraction.text import TfidfVectorizer
+            vectorizer = TfidfVectorizer(max_features=5000)
+            return vectorizer.fit_transform(sentences).toarray()
+    def _compute_similarity_matrix(self, embeddings: np.ndarray) -> np.ndarray:
+        """
+        Compute pairwise similarity between embeddings.
+        Args:
+            embeddings: Array of sentence embeddings
+        Returns:
+            similarity_matrix: Matrix of pairwise similarities
+        """
+        # Return empty array for empty input
+        if embeddings.shape[0] == 0:
+            return np.array([])
+        # Compute cosine similarity
+        return cosine_similarity(embeddings)
+    def _deduplicate_sentences(
+        self,
+        sentences: List[str],
+        importances: Optional[List[float]] = None
+    ) -> List[int]:
+        """
+        Identify non-redundant sentence indices.
+        Args:
+            sentences: List of sentences to deduplicate
+            importances: Optional list of importance scores
+        Returns:
+            kept_indices: Indices of sentences to keep
+        """
+        if not sentences:
+            return []
+        # Filter out sentences that are too short
+        valid_indices = [i for i, s in enumerate(sentences) if len(s.split()) >= self.min_sentence_length]
+        if not valid_indices:
+            # If no sentences meet the min length, return all indices
+            return list(range(len(sentences)))
+        # Get embeddings for valid sentences
+        valid_sentences = [sentences[i] for i in valid_indices]
+        embeddings = self._get_embeddings(valid_sentences)
+        # Compute pairwise similarity
+        similarity_matrix = self._compute_similarity_matrix(embeddings)
+        # Set diagonal to 0 to avoid self-similarity
+        np.fill_diagonal(similarity_matrix, 0)
+        # Determine which sentences to keep
+        kept_indices = []
+        remaining_indices = set(range(len(valid_indices)))
+        # If importances are provided, start with most important sentences
+        if importances is not None:
+            valid_importances = [importances[i] for i in valid_indices]
+            ordered_indices = [i for i, _ in sorted(
+                enumerate(valid_importances),
+                key=lambda x: x[1],
+                reverse=True
+            )]
+        else:
+            # Otherwise, use sentence length as a simple importance proxy
+            ordered_indices = [i for i, _ in sorted(
+                enumerate(valid_sentences),
+                key=lambda x: len(x[1].split()),
+                reverse=True
+            )]
+        # Process sentences in order of importance
+        for idx in ordered_indices:
+            if idx not in remaining_indices:
+                continue
+            # Keep this sentence
+            kept_indices.append(valid_indices[idx])
+            remaining_indices.remove(idx)
+            # Remove similar sentences
+            similar_indices = [
+                i for i in remaining_indices
+                if similarity_matrix[idx, i] > self.threshold
+            ]
+            remaining_indices -= set(similar_indices)
+            # Break if we've processed all indices
+            if not remaining_indices:
+                break
+        # Add any remaining short sentences we skipped earlier
+        short_indices = [i for i, s in enumerate(sentences) if len(s.split()) < self.min_sentence_length]
+        kept_indices.extend(short_indices)
+        # Sort to maintain original order
+        return sorted(kept_indices)
+    def compress(self, content: str, target_size: Optional[int] = None) -> str:
+        """
+        Compress content by removing semantic duplicates.
+        Args:
+            content: The content to compress
+            target_size: Optional target size in tokens
+        Returns:
+            compressed_content: The compressed content
+        """
+        # Split content into sentences
+        sentences = split_into_sentences(content)
+        if not sentences:
+            return content
+        # Get sentence importance scores
+        importances = get_sentence_importance(sentences)
+        # Deduplicate sentences
+        kept_indices = self._deduplicate_sentences(sentences, importances)
+        # Combine kept sentences
+        kept_sentences = [sentences[i] for i in kept_indices]
+        compressed = " ".join(kept_sentences)
+        # If we need to compress further to meet target size
+        if target_size and len(compressed.split()) > target_size:
+            # Calculate how many more sentences to remove
+            current_size = len(compressed.split())
+            reduction_needed = current_size - target_size
+            # Sort sentences by importance (lowest first)
+            sentence_priorities = [(i, importances[i]) for i in kept_indices]
+            sorted_priorities = sorted(sentence_priorities, key=lambda x: x[1])
+            # Remove least important sentences until we meet target size
+            remove_count = 0
+            tokens_removed = 0
+            indices_to_remove = []
+            for idx, _ in sorted_priorities:
+                sentence_tokens = len(sentences[idx].split())
+                tokens_removed += sentence_tokens
+                remove_count += 1
+                indices_to_remove.append(idx)
+                if tokens_removed >= reduction_needed:
+                    break
+            # Remove the low-importance sentences
+            final_indices = [i for i in kept_indices if i not in indices_to_remove]
+            # Recombine
+            compressed = " ".join(sentences[i] for i in sorted(final_indices))
+        # Log compression stats
+        original_tokens = len(content.split())
+        compressed_tokens = len(compressed.split())
+        reduction = (1 - compressed_tokens / original_tokens) * 100 if original_tokens > 0 else 0
+        logger.info(
+            "Compressed from %d to %d tokens (%.1f%% reduction)",
+            original_tokens, compressed_tokens, reduction
+        )
+        return compressed

efficient_context/context_manager.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""
+Core context management module for efficient-context library.
+"""
+from typing import List, Dict, Any, Optional, Union
+import logging
+from pydantic import BaseModel, Field
+from efficient_context.compression.base import BaseCompressor
+from efficient_context.chunking.base import BaseChunker
+from efficient_context.retrieval.base import BaseRetriever
+from efficient_context.memory.memory_manager import MemoryManager
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class Document(BaseModel):
+    """A document to be processed by the context manager."""
+    id: str = Field(..., description="Unique identifier for the document")
+    content: str = Field(..., description="Text content of the document")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Optional metadata for the document")
+class ContextManager:
+    """
+    Main class for managing context efficiently for LLMs in CPU-constrained environments.
+    This class orchestrates the compression, chunking, retrieval, and memory management
+    components to optimize context handling for LLMs running on limited hardware.
+    """
+    def __init__(
+        self,
+        compressor: Optional[BaseCompressor] = None,
+        chunker: Optional[BaseChunker] = None,
+        retriever: Optional[BaseRetriever] = None,
+        memory_manager: Optional[MemoryManager] = None,
+        max_context_size: int = 4096,
+    ):
+        """
+        Initialize the context manager with configurable components.
+        Args:
+            compressor: Component for compressing context content
+            chunker: Component for chunking content
+            retriever: Component for retrieving relevant chunks
+            memory_manager: Component for managing memory usage
+            max_context_size: Maximum size of context in tokens
+        """
+        from efficient_context.compression import SemanticDeduplicator
+        from efficient_context.chunking import SemanticChunker
+        from efficient_context.retrieval import CPUOptimizedRetriever
+        from efficient_context.memory import MemoryManager
+        self.compressor = compressor or SemanticDeduplicator()
+        self.chunker = chunker or SemanticChunker()
+        self.retriever = retriever or CPUOptimizedRetriever()
+        self.memory_manager = memory_manager or MemoryManager()
+        self.max_context_size = max_context_size
+        self.documents = {}
+        self.chunks = []
+        logger.info("Context Manager initialized with max context size: %d", max_context_size)
+    def add_document(self, document: Union[Document, Dict, str], document_id: Optional[str] = None) -> str:
+        """
+        Add a document to the context manager.
+        Args:
+            document: Document to add (can be a Document object, dict, or string content)
+            document_id: Optional ID for the document (generated if not provided)
+        Returns:
+            document_id: ID of the added document
+        """
+        # Convert input to Document object
+        if isinstance(document, str):
+            if document_id is None:
+                import uuid
+                document_id = str(uuid.uuid4())
+            doc = Document(id=document_id, content=document)
+        elif isinstance(document, dict):
+            if 'id' in document:
+                document_id = document['id']
+            elif document_id is None:
+                import uuid
+                document_id = str(uuid.uuid4())
+            doc = Document(
+                id=document_id,
+                content=document.get('content', ''),
+                metadata=document.get('metadata', {})
+            )
+        else:
+            doc = document
+            document_id = doc.id
+        # Store the document
+        self.documents[document_id] = doc
+        # Process the document
+        with self.memory_manager.optimize_memory():
+            # Compress the document
+            compressed_content = self.compressor.compress(doc.content)
+            # Chunk the compressed content
+            doc_chunks = self.chunker.chunk(compressed_content, metadata=doc.metadata, document_id=doc.id)
+            # Index the chunks for retrieval
+            self.retriever.index_chunks(doc_chunks)
+            # Store the chunks
+            self.chunks.extend(doc_chunks)
+        logger.info("Added document with ID %s (%d chunks)", document_id, len(doc_chunks))
+        return document_id
+    def add_documents(self, documents: List[Union[Document, Dict, str]]) -> List[str]:
+        """
+        Add multiple documents to the context manager.
+        Args:
+            documents: List of documents to add
+        Returns:
+            document_ids: List of IDs of added documents
+        """
+        document_ids = []
+        for doc in documents:
+            doc_id = self.add_document(doc)
+            document_ids.append(doc_id)
+        return document_ids
+    def generate_context(self, query: str, max_size: Optional[int] = None) -> str:
+        """
+        Generate optimized context for a given query.
+        Args:
+            query: The query for which to generate context
+            max_size: Maximum size of the context (defaults to self.max_context_size)
+        Returns:
+            context: Optimized context for the query
+        """
+        max_size = max_size or self.max_context_size
+        with self.memory_manager.optimize_memory():
+            # Retrieve relevant chunks
+            relevant_chunks = self.retriever.retrieve(query, top_k=max_size)
+            # Combine chunks into a context
+            context_parts = [chunk.content for chunk in relevant_chunks]
+            # Final compression to ensure we're within size limits
+            combined_context = "\n\n".join(context_parts)
+            if len(combined_context.split()) > max_size:
+                combined_context = self.compressor.compress(combined_context, target_size=max_size)
+        logger.info("Generated context of size ~%d tokens for query", len(combined_context.split()))
+        return combined_context
+    def clear(self):
+        """Clear all documents and chunks from the context manager."""
+        self.documents = {}
+        self.chunks = []
+        self.retriever.clear()
+        logger.info("Context manager cleared")

efficient_context/memory/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+Memory management components for efficient-context.
+"""
+from efficient_context.memory.memory_manager import MemoryManager
+__all__ = ["MemoryManager"]

efficient_context/memory/memory_manager.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+Memory management utilities for efficient-context.
+"""
+import logging
+import gc
+import os
+import psutil
+from typing import Optional, Dict, Any
+from contextlib import contextmanager
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MemoryManager:
+    """
+    Manages memory usage for efficient context handling.
+    This class provides utilities to monitor and optimize memory usage
+    when working with large language models and context on CPU.
+    """
+    def __init__(
+        self,
+        target_usage_percent: float = 80.0,
+        aggressive_cleanup: bool = False,
+        memory_monitor_interval: Optional[float] = None,
+    ):
+        """
+        Initialize the MemoryManager.
+        Args:
+            target_usage_percent: Target memory usage as percentage of available memory
+            aggressive_cleanup: Whether to perform aggressive garbage collection
+            memory_monitor_interval: Interval for memory monitoring in seconds (None to disable)
+        """
+        self.target_usage_percent = target_usage_percent
+        self.aggressive_cleanup = aggressive_cleanup
+        self.memory_monitor_interval = memory_monitor_interval
+        self.monitor_active = False
+        logger.info(
+            "MemoryManager initialized with target usage: %.1f%%",
+            target_usage_percent
+        )
+    def get_memory_usage(self) -> Dict[str, Any]:
+        """
+        Get current memory usage statistics.
+        Returns:
+            stats: Dictionary of memory usage statistics
+        """
+        # Get process memory info
+        process = psutil.Process(os.getpid())
+        process_memory = process.memory_info()
+        # Get system memory info
+        system_memory = psutil.virtual_memory()
+        # Calculate usage percentages
+        process_percent = (process_memory.rss / system_memory.total) * 100
+        system_percent = system_memory.percent
+        return {
+            "process_rss_bytes": process_memory.rss,
+            "process_vms_bytes": process_memory.vms,
+            "process_percent": process_percent,
+            "system_available_bytes": system_memory.available,
+            "system_total_bytes": system_memory.total,
+            "system_used_percent": system_percent,
+        }
+    def log_memory_usage(self) -> None:
+        """Log memory usage statistics."""
+        stats = self.get_memory_usage()
+        logger.info(
+            "Memory usage: Process: %.1f%% (%.1f MB), System: %.1f%% (%.1f GB available)",
+            stats["process_percent"],
+            stats["process_rss_bytes"] / (1024 * 1024),
+            stats["system_used_percent"],
+            stats["system_available_bytes"] / (1024 * 1024 * 1024)
+        )
+    def cleanup_memory(self) -> None:
+        """Perform memory cleanup."""
+        # Run garbage collection
+        collected = gc.collect()
+        if self.aggressive_cleanup:
+            # Run an additional, more aggressive pass
+            collected += gc.collect()
+        logger.debug("Memory cleanup: Collected %d objects", collected)
+    def _check_memory_threshold(self) -> bool:
+        """
+        Check if memory usage exceeds the target threshold.
+        Returns:
+            exceeded: Whether the threshold is exceeded
+        """
+        stats = self.get_memory_usage()
+        return stats["system_used_percent"] > self.target_usage_percent
+    @contextmanager
+    def optimize_memory(self):
+        """
+        Context manager for optimizing memory during operations.
+        Example:
+            ```
+            with memory_manager.optimize_memory():
+                # Run memory-intensive operations
+            ```
+        """
+        # Log initial memory state if in debug mode
+        if logger.isEnabledFor(logging.DEBUG):
+            self.log_memory_usage()
+        try:
+            # Yield control back to the caller
+            yield
+        finally:
+            # Check if we need to clean up memory
+            if self._check_memory_threshold():
+                logger.info("Memory threshold exceeded, performing cleanup")
+                self.cleanup_memory()
+                # Log final memory state if in debug mode
+                if logger.isEnabledFor(logging.DEBUG):
+                    self.log_memory_usage()

efficient_context/retrieval/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+"""
+Retrieval components for efficient-context.
+"""
+from efficient_context.retrieval.base import BaseRetriever
+from efficient_context.retrieval.cpu_optimized_retriever import CPUOptimizedRetriever
+__all__ = ["BaseRetriever", "CPUOptimizedRetriever"]

efficient_context/retrieval/base.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+Base classes for retrieval components.
+"""
+from abc import ABC, abstractmethod
+from typing import List, Optional
+from efficient_context.chunking.base import Chunk
+class BaseRetriever(ABC):
+    """Base class for content retrieval components."""
+    @abstractmethod
+    def index_chunks(self, chunks: List[Chunk]) -> None:
+        """
+        Index chunks for future retrieval.
+        Args:
+            chunks: Chunks to index
+        """
+        pass
+    @abstractmethod
+    def retrieve(self, query: str, top_k: Optional[int] = None) -> List[Chunk]:
+        """
+        Retrieve chunks relevant to a query.
+        Args:
+            query: Query to retrieve chunks for
+            top_k: Number of chunks to retrieve
+        Returns:
+            chunks: List of retrieved chunks
+        """
+        pass
+    @abstractmethod
+    def clear(self) -> None:
+        """Clear all indexed chunks."""
+        pass

efficient_context/retrieval/cpu_optimized_retriever.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+CPU-optimized retrieval for efficient context handling.
+"""
+import logging
+import heapq
+from typing import List, Dict, Any, Optional, Tuple, Union
+import numpy as np
+from efficient_context.retrieval.base import BaseRetriever
+from efficient_context.chunking.base import Chunk
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class CPUOptimizedRetriever(BaseRetriever):
+    """
+    Retriever optimized for CPU performance and low memory usage.
+    This retriever uses techniques to minimize computational requirements
+    while still providing high-quality retrieval results.
+    """
+    def __init__(
+        self,
+        embedding_model: str = "lightweight",
+        similarity_metric: str = "cosine",
+        use_batching: bool = True,
+        batch_size: int = 32,
+        max_index_size: Optional[int] = None,
+    ):
+        """
+        Initialize the CPUOptimizedRetriever.
+        Args:
+            embedding_model: Model to use for embeddings
+            similarity_metric: Metric for comparing embeddings
+            use_batching: Whether to batch embedding operations
+            batch_size: Size of batches for embedding
+            max_index_size: Maximum number of chunks to keep in the index
+        """
+        self.embedding_model = embedding_model
+        self.similarity_metric = similarity_metric
+        self.use_batching = use_batching
+        self.batch_size = batch_size
+        self.max_index_size = max_index_size
+        # Initialize storage
+        self.chunks = []
+        self.chunk_embeddings = None
+        self.chunk_ids_to_index = {}
+        # Initialize the embedding model
+        self._init_embedding_model()
+        logger.info("CPUOptimizedRetriever initialized with model: %s", embedding_model)
+    def _init_embedding_model(self):
+        """Initialize the embedding model."""
+        try:
+            from sentence_transformers import SentenceTransformer
+            # Choose a lightweight model for CPU efficiency
+            if self.embedding_model == "lightweight":
+                # MiniLM models are lightweight and efficient
+                self.model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
+            else:
+                # Default to a balanced model
+                self.model = SentenceTransformer(self.embedding_model)
+            logger.info("Using embedding model: %s", self.model.get_sentence_embedding_dimension())
+        except ImportError:
+            logger.warning("SentenceTransformer not available, using numpy fallback (less accurate)")
+            self.model = None
+    def _get_embeddings(self, texts: List[str]) -> np.ndarray:
+        """
+        Get embeddings for a list of texts.
+        Args:
+            texts: List of texts to embed
+        Returns:
+            embeddings: Array of text embeddings
+        """
+        if not texts:
+            return np.array([])
+        if self.model is not None:
+            # Use the sentence transformer if available
+            # Apply batching for memory efficiency
+            if self.use_batching and len(texts) > self.batch_size:
+                embeddings = []
+                for i in range(0, len(texts), self.batch_size):
+                    batch = texts[i:i+self.batch_size]
+                    batch_embeddings = self.model.encode(
+                        batch,
+                        show_progress_bar=False,
+                        convert_to_numpy=True
+                    )
+                    embeddings.append(batch_embeddings)
+                return np.vstack(embeddings)
+            else:
+                return self.model.encode(texts, show_progress_bar=False)
+        else:
+            # Fallback to a simple Bag-of-Words approach
+            from sklearn.feature_extraction.text import TfidfVectorizer
+            vectorizer = TfidfVectorizer(max_features=5000)
+            return vectorizer.fit_transform(texts).toarray()
+    def _compute_similarities(self, query_embedding: np.ndarray, chunk_embeddings: np.ndarray) -> np.ndarray:
+        """
+        Compute similarities between query and chunk embeddings.
+        Args:
+            query_embedding: Embedding of the query
+            chunk_embeddings: Embeddings of the chunks
+        Returns:
+            similarities: Array of similarity scores
+        """
+        if self.similarity_metric == "cosine":
+            # Normalize the embeddings for cosine similarity
+            query_norm = np.linalg.norm(query_embedding)
+            if query_norm > 0:
+                query_embedding = query_embedding / query_norm
+            # Compute cosine similarity efficiently
+            return np.dot(chunk_embeddings, query_embedding)
+        elif self.similarity_metric == "dot":
+            # Simple dot product
+            return np.dot(chunk_embeddings, query_embedding)
+        elif self.similarity_metric == "euclidean":
+            # Negative Euclidean distance (higher is more similar)
+            return -np.sqrt(np.sum((chunk_embeddings - query_embedding) ** 2, axis=1))
+        else:
+            # Default to cosine
+            return np.dot(chunk_embeddings, query_embedding)
+    def index_chunks(self, chunks: List[Chunk]) -> None:
+        """
+        Index chunks for future retrieval.
+        Args:
+            chunks: Chunks to index
+        """
+        if not chunks:
+            return
+        # Add new chunks
+        for chunk in chunks:
+            # Skip if chunk is already indexed
+            if chunk.chunk_id in self.chunk_ids_to_index:
+                continue
+            self.chunks.append(chunk)
+            self.chunk_ids_to_index[chunk.chunk_id] = len(self.chunks) - 1
+        # Get embeddings for all chunks
+        chunk_texts = [chunk.content for chunk in self.chunks]
+        self.chunk_embeddings = self._get_embeddings(chunk_texts)
+        # Apply dimensionality reduction if needed for memory efficiency
+        if (self.max_index_size is not None and
+            len(self.chunks) > self.max_index_size and
+            self.model is not None):
+            # Keep only the most recent chunks
+            self.chunks = self.chunks[-self.max_index_size:]
+            # Update the index mapping
+            self.chunk_ids_to_index = {
+                chunk.chunk_id: i for i, chunk in enumerate(self.chunks)
+            }
+            # Recalculate embeddings for the pruned set
+            chunk_texts = [chunk.content for chunk in self.chunks]
+            self.chunk_embeddings = self._get_embeddings(chunk_texts)
+        # Normalize embeddings for cosine similarity
+        if self.similarity_metric == "cosine" and self.chunk_embeddings is not None:
+            # Compute norms of each embedding vector
+            norms = np.linalg.norm(self.chunk_embeddings, axis=1, keepdims=True)
+            # Avoid division by zero - normalize only where norm > 0
+            non_zero_norms = norms > 0
+            if np.any(non_zero_norms):
+                # Directly normalize by dividing by norms (with keepdims=True, broadcasting works correctly)
+                self.chunk_embeddings = np.where(
+                    non_zero_norms,
+                    self.chunk_embeddings / norms,
+                    self.chunk_embeddings
+                )
+        logger.info("Indexed %d chunks (total: %d)", len(chunks), len(self.chunks))
+    def retrieve(self, query: str, top_k: Optional[int] = None) -> List[Chunk]:
+        """
+        Retrieve chunks relevant to a query.
+        Args:
+            query: Query to retrieve chunks for
+            top_k: Number of chunks to retrieve (default: 5)
+        Returns:
+            chunks: List of retrieved chunks
+        """
+        if not self.chunks:
+            logger.warning("No chunks indexed for retrieval")
+            return []
+        if not query:
+            logger.warning("Empty query provided")
+            return []
+        # Default top_k
+        top_k = top_k or 5
+        # Get query embedding
+        query_embedding = self._get_embeddings([query])[0]
+        # Compute similarities
+        similarities = self._compute_similarities(query_embedding, self.chunk_embeddings)
+        # Get indices of top-k most similar chunks
+        if top_k >= len(similarities):
+            top_indices = list(range(len(similarities)))
+            top_indices.sort(key=lambda i: similarities[i], reverse=True)
+        else:
+            # More efficient partial sort for large indices
+            top_indices = heapq.nlargest(top_k, range(len(similarities)), key=lambda i: similarities[i])
+        # Get the corresponding chunks
+        retrieved_chunks = [self.chunks[i] for i in top_indices]
+        logger.info("Retrieved %d chunks for query", len(retrieved_chunks))
+        return retrieved_chunks
+    def clear(self) -> None:
+        """Clear all indexed chunks."""
+        self.chunks = []
+        self.chunk_embeddings = None
+        self.chunk_ids_to_index = {}
+        logger.info("Cleared chunk index")

efficient_context/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""
+Utility functions for efficient-context.
+"""
+# Import utilities as needed
+from efficient_context.utils.text import (
+    split_into_sentences,
+    get_sentence_importance,
+    calculate_text_overlap
+)
+__all__ = ["split_into_sentences", "get_sentence_importance", "calculate_text_overlap"]

efficient_context/utils/text.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+Text processing utilities for the efficient-context library.
+"""
+import re
+from typing import List, Dict, Any
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def split_into_sentences(text: str) -> List[str]:
+    """
+    Split text into sentences.
+    Args:
+        text: Text to split
+    Returns:
+        sentences: List of sentences
+    """
+    # Simple but effective sentence splitting
+    # This handles most common sentence endings while preserving common abbreviations
+    text = text.replace('\n', ' ')
+    # Try to use NLTK if available for better sentence splitting
+    try:
+        import nltk
+        try:
+            return nltk.sent_tokenize(text)
+        except Exception as e:
+            logger.warning(f"NLTK sentence tokenizer error: {e}. Using fallback.")
+            return _simple_sentence_split(text)
+    except ImportError:
+        logger.warning("NLTK not available, using fallback sentence splitter")
+        return _simple_sentence_split(text)
+def _simple_sentence_split(text: str) -> List[str]:
+    """Fallback sentence splitter without dependencies."""
+    # This is a simplified version, not as accurate as NLTK but works without dependencies
+    # Handle common abbreviations to avoid splitting them
+    for abbr in ['Mr.', 'Mrs.', 'Dr.', 'vs.', 'e.g.', 'i.e.', 'etc.']:
+        text = text.replace(abbr, abbr.replace('.', '<POINT>'))
+    # Split on sentence endings
+    sentences = re.split(r'(?<=[.!?])\s+', text)
+    # Restore abbreviations
+    sentences = [s.replace('<POINT>', '.') for s in sentences]
+    # Remove empty sentences
+    return [s for s in sentences if s.strip()]
+def get_sentence_importance(sentences: List[str]) -> List[float]:
+    """
+    Calculate importance scores for sentences based on heuristics.
+    Args:
+        sentences: List of sentences to score
+    Returns:
+        importances: List of importance scores (0.0 to 1.0)
+    """
+    # Simple heuristics for scoring sentence importance
+    importances = []
+    for sentence in sentences:
+        score = 0.0
+        words = sentence.split()
+        # Longer sentences tend to be more informative (up to a point)
+        length_score = min(len(words) / 20, 1.0)
+        # Keywords suggest important content
+        keyword_score = 0.0
+        keywords = ['important', 'significant', 'key', 'critical', 'crucial',
+                   'essential', 'main', 'major', 'primary', 'central',
+                   'result', 'conclusion', 'finding', 'discovered', 'shows']
+        for word in words:
+            if word.lower() in keywords:
+                keyword_score += 0.2
+        keyword_score = min(keyword_score, 0.6)  # Cap keyword importance
+        # Presence of numbers often indicates factual content
+        number_score = 0.0
+        if re.search(r'\d', sentence):
+            number_score = 0.2
+        # Combine scores
+        score = 0.5 * length_score + 0.3 * keyword_score + 0.2 * number_score
+        # Cap at 1.0
+        importances.append(min(score, 1.0))
+    return importances
+def calculate_text_overlap(text1: str, text2: str) -> float:
+    """
+    Calculate simple text overlap between two strings.
+    Args:
+        text1: First text
+        text2: Second text
+    Returns:
+        overlap_ratio: Ratio of shared tokens (0.0 to 1.0)
+    """
+    # Convert to sets of tokens
+    tokens1 = set(text1.lower().split())
+    tokens2 = set(text2.lower().split())
+    # Calculate overlap
+    if not tokens1 or not tokens2:
+        return 0.0
+    overlap = tokens1.intersection(tokens2)
+    return len(overlap) / min(len(tokens1), len(tokens2))

examples/basic_usage.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+Example usage of efficient-context library.
+"""
+import logging
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+from efficient_context.memory import MemoryManager
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def main():
+    # Sample documents
+    documents = [
+        {
+            "content": """
+            Renewable energy is derived from natural sources that are replenished at a higher rate than they are consumed.
+            Sunlight and wind, for example, are such sources that are constantly being replenished.
+            Renewable energy resources exist over wide geographical areas, in contrast to fossil fuels,
+            which are concentrated in a limited number of countries.
+            Rapid deployment of renewable energy and energy efficiency technologies is resulting in significant
+            energy security, climate change mitigation, and economic benefits.
+            In international public opinion surveys there is strong support for promoting renewable sources
+            such as solar power and wind power.
+            While many renewable energy projects are large-scale, renewable technologies are also suited to rural
+            and remote areas and developing countries, where energy is often crucial in human development.
+            As most of the renewable energy technologies provide electricity, renewable energy is often deployed
+            together with further electrification, which has several benefits: electricity can be converted to heat,
+            can be converted into mechanical energy with high efficiency, and is clean at the point of consumption.
+            """,
+            "metadata": {"topic": "renewable energy", "source": "example"}
+        },
+        {
+            "content": """
+            Climate change mitigation consists of actions to limit global warming and its related effects.
+            This involves reductions in human emissions of greenhouse gases (GHGs) as well as activities
+            that reduce their concentration in the atmosphere.
+            Fossil fuels account for more than 70% of GHG emissions. The energy sector contributes to global
+            emissions, mainly through the burning of fossil fuels to generate electricity and heat,
+            and through the use of gasoline and diesel to power vehicles.
+            A transition to renewable energy is a key component of climate change mitigation. By replacing
+            fossil fuel power plants with renewable energy sources, such as wind and solar, we can reduce
+            the amount of greenhouse gases emitted into the atmosphere.
+            Renewable energy can also play a role in adapting to climate change, for example by providing
+            reliable power for cooling in increasingly hot regions, or by ensuring energy access in the
+            aftermath of climate-related disasters.
+            """,
+            "metadata": {"topic": "climate change", "source": "example"}
+        },
+    ]
+    # Initialize a context manager with custom strategies
+    context_manager = ContextManager(
+        compressor=SemanticDeduplicator(threshold=0.85),
+        chunker=SemanticChunker(chunk_size=256),
+        retriever=CPUOptimizedRetriever(embedding_model="lightweight"),
+        memory_manager=MemoryManager(target_usage_percent=80.0),
+        max_context_size=1024
+    )
+    # Add documents to the context manager
+    document_ids = context_manager.add_documents(documents)
+    # Query 1: Generate optimized context for a query
+    query1 = "Tell me about the climate impact of renewable energy"
+    print(f"\n\n=== QUERY: {query1} ===")
+    optimized_context1 = context_manager.generate_context(query=query1)
+    print(f"--- OPTIMIZED CONTEXT ({len(optimized_context1.split())} tokens) ---")
+    print(optimized_context1)
+    # Query 2: Different topic
+    query2 = "How does renewable energy work in rural areas?"
+    print(f"\n\n=== QUERY: {query2} ===")
+    optimized_context2 = context_manager.generate_context(query=query2)
+    print(f"--- OPTIMIZED CONTEXT ({len(optimized_context2.split())} tokens) ---")
+    print(optimized_context2)
+    # Example of using with an LLM (commented out since we don't have an actual LLM here)
+    # response = your_llm_model.generate(prompt="Answer this question using the provided context.", context=optimized_context)
+    # print(f"LLM Response: {response}")
+if __name__ == "__main__":
+    main()

examples/benchmark.py ADDED Viewed

	@@ -0,0 +1,209 @@

+"""
+Benchmarking script for efficient-context performance.
+"""
+import logging
+import time
+import argparse
+import random
+import string
+import psutil
+import os
+import gc
+from typing import List, Dict, Any
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def generate_random_text(words: int = 1000, paragraphs: int = 5) -> str:
+    """
+    Generate random text for benchmarking.
+    Args:
+        words: Number of words to generate
+        paragraphs: Number of paragraphs to split the text into
+    Returns:
+        text: Generated random text
+    """
+    # List of common words for more realistic text
+    common_words = [
+        "the", "be", "to", "of", "and", "a", "in", "that", "have", "I",
+        "it", "for", "not", "on", "with", "he", "as", "you", "do", "at",
+        "this", "but", "his", "by", "from", "they", "we", "say", "her", "she",
+        "or", "an", "will", "my", "one", "all", "would", "there", "their", "what",
+        "so", "up", "out", "if", "about", "who", "get", "which", "go", "me",
+        "renewable", "energy", "climate", "wind", "solar", "power", "change", "global",
+        "sustainable", "resources", "efficiency", "emissions", "carbon", "technology"
+    ]
+    # Generate paragraphs
+    result = []
+    words_per_paragraph = words // paragraphs
+    for i in range(paragraphs):
+        paragraph_words = []
+        for j in range(words_per_paragraph):
+            # Occasionally add a random word for variety
+            if random.random() < 0.1:
+                word = ''.join(random.choice(string.ascii_lowercase) for _ in range(random.randint(3, 10)))
+            else:
+                word = random.choice(common_words)
+            # Capitalize first word of sentence
+            if j == 0 or paragraph_words[-1].endswith('.'):
+                word = word.capitalize()
+            # Add punctuation occasionally
+            if j > 0 and j % random.randint(8, 15) == 0:
+                word += '.'
+            elif random.random() < 0.05:
+                word += ','
+            paragraph_words.append(word)
+        # Ensure paragraph ends with period
+        if not paragraph_words[-1].endswith('.'):
+            paragraph_words[-1] += '.'
+        result.append(' '.join(paragraph_words))
+    return '\n\n'.join(result)
+def get_memory_usage() -> Dict[str, Any]:
+    """
+    Get current memory usage.
+    Returns:
+        stats: Memory usage statistics
+    """
+    process = psutil.Process(os.getpid())
+    memory_info = process.memory_info()
+    return {
+        "rss": memory_info.rss / (1024 * 1024),  # MB
+        "vms": memory_info.vms / (1024 * 1024)   # MB
+    }
+def run_benchmark(
+    num_documents: int = 10,
+    words_per_document: int = 1000,
+    num_queries: int = 5
+) -> None:
+    """
+    Run a benchmark of efficient-context performance.
+    Args:
+        num_documents: Number of documents to process
+        words_per_document: Number of words per document
+        num_queries: Number of queries to run
+    """
+    logger.info(f"Starting benchmark with {num_documents} documents, {words_per_document} words each")
+    # Initialize context manager
+    context_manager = ContextManager(
+        compressor=SemanticDeduplicator(threshold=0.85),
+        chunker=SemanticChunker(chunk_size=256),
+        retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+    )
+    # Generate documents
+    logger.info("Generating random documents...")
+    documents = []
+    for i in range(num_documents):
+        content = generate_random_text(words=words_per_document, paragraphs=5)
+        documents.append({
+            "content": content,
+            "metadata": {"id": f"doc-{i}", "source": "benchmark"}
+        })
+    # Measure document processing
+    logger.info("Adding documents to context manager...")
+    start_mem = get_memory_usage()
+    start_time = time.time()
+    document_ids = context_manager.add_documents(documents)
+    end_time = time.time()
+    end_mem = get_memory_usage()
+    processing_time = end_time - start_time
+    memory_increase = end_mem["rss"] - start_mem["rss"]
+    logger.info(f"Document processing:")
+    logger.info(f"  - Time: {processing_time:.2f} seconds")
+    logger.info(f"  - Average per document: {processing_time / num_documents:.4f} seconds")
+    logger.info(f"  - Memory usage increase: {memory_increase:.2f} MB")
+    logger.info(f"  - Total chunks created: {len(context_manager.chunks)}")
+    # Generate random queries
+    logger.info("Generating context for queries...")
+    queries = [
+        f"Explain {random.choice(['renewable', 'sustainable', 'clean', 'alternative'])} energy",
+        f"What are the {random.choice(['benefits', 'advantages', 'impacts', 'effects'])} of {random.choice(['solar', 'wind', 'hydro', 'geothermal'])} power?",
+        f"How does {random.choice(['climate change', 'global warming', 'carbon emissions', 'greenhouse gases'])} affect the environment?",
+        f"Discuss the {random.choice(['future', 'potential', 'limitations', 'challenges'])} of renewable energy",
+        f"What is the {random.choice(['relationship', 'connection', 'link', 'correlation'])} between energy consumption and climate change?"
+    ]
+    # Ensure we have enough queries
+    while len(queries) < num_queries:
+        queries.append(f"Tell me about {random.choice(['energy', 'climate', 'sustainability', 'emissions'])}")
+    # Select the requested number of queries
+    selected_queries = random.sample(queries, min(num_queries, len(queries)))
+    # Measure query processing
+    total_query_time = 0
+    total_query_tokens = 0
+    for i, query in enumerate(selected_queries):
+        # Clear some memory and cache before each query
+        gc.collect()
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        context_tokens = len(context.split())
+        total_query_time += query_time
+        total_query_tokens += context_tokens
+        logger.info(f"Query {i+1}: '{query}'")
+        logger.info(f"  - Time: {query_time:.4f} seconds")
+        logger.info(f"  - Context size: {context_tokens} tokens")
+    avg_query_time = total_query_time / num_queries
+    avg_tokens = total_query_tokens / num_queries
+    logger.info("\nBenchmark Summary:")
+    logger.info(f"  - Documents processed: {num_documents} ({words_per_document} words each)")
+    logger.info(f"  - Queries executed: {num_queries}")
+    logger.info(f"  - Document processing time: {processing_time:.2f} seconds ({processing_time / num_documents:.4f}s per document)")
+    logger.info(f"  - Average query time: {avg_query_time:.4f} seconds")
+    logger.info(f"  - Average context size: {avg_tokens:.1f} tokens")
+    logger.info(f"  - Final memory usage: {get_memory_usage()['rss']:.2f} MB")
+def main():
+    """Main function for the benchmark script."""
+    parser = argparse.ArgumentParser(description="Benchmark efficient-context performance")
+    parser.add_argument("--documents", type=int, default=10, help="Number of documents to process")
+    parser.add_argument("--words", type=int, default=1000, help="Words per document")
+    parser.add_argument("--queries", type=int, default=5, help="Number of queries to run")
+    args = parser.parse_args()
+    run_benchmark(
+        num_documents=args.documents,
+        words_per_document=args.words,
+        num_queries=args.queries
+    )
+if __name__ == "__main__":
+    main()

examples/dedup_benchmark.py ADDED Viewed

	@@ -0,0 +1,214 @@

+#!/usr/bin/env python
+"""
+Specialized benchmark script for measuring the effectiveness of semantic deduplication
+in the efficient-context library.
+"""
+import logging
+import time
+import argparse
+import sys
+from typing import List, Dict, Any    # Set up logging
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger(__name__)
+    logger.info("Deduplication benchmark starting")
+    # Print Python and environment information
+    import platform
+    import sys
+    logger.info(f"Python version: {platform.python_version()}")
+    logger.info(f"Platform: {platform.platform()}")
+    # Import the library
+    try:
+        from efficient_context import ContextManager
+        from efficient_context.compression import SemanticDeduplicator
+        from efficient_context.chunking import SemanticChunker
+        from efficient_context.retrieval import CPUOptimizedRetriever
+        logger.info("Successfully imported efficient_context")
+    except ImportError as e:
+        logger.error(f"Failed to import efficient_context: {e}")
+        sys.exit(1)
+def generate_repetitive_document() -> str:
+    """
+    Generate a document with deliberate semantic repetition.
+    The document will contain sentences that mean the same thing
+    expressed in different ways.
+    """
+    # Base paragraphs with distinct topics
+    base_paragraphs = [
+        # Climate change paragraph with repetitive content
+        """
+        Climate change is a significant and lasting alteration in the statistical distribution of weather
+        patterns over periods ranging from decades to millions of years. Global warming is the long-term
+        heating of Earth's climate system observed since the pre-industrial period due to human activities.
+        The rise in global temperature is causing substantial changes in our environment and ecosystems.
+        The warming of the planet is leading to significant transformations in weather patterns worldwide.
+        Human activities are causing Earth's temperature to increase, resulting in climate modifications.
+        The climate crisis is fundamentally altering the Earth's atmosphere and affecting all living things.
+        """,
+        # Renewable energy paragraph with repetitive content
+        """
+        Renewable energy comes from sources that are naturally replenishing but flow-limited.
+        Clean energy is derived from natural processes that are constantly replenished.
+        Sustainable power is generated from resources that won't deplete over time.
+        Green energy utilizes sources that don't produce pollution when generating power.
+        Alternative energy refers to sources that are an alternative to fossil fuel.
+        Eco-friendly power generation relies on inexhaustible natural resources.
+        """,
+        # Technology paragraph with repetitive content
+        """
+        Artificial intelligence is revolutionizing how we interact with technology.
+        Machine learning is transforming the way computers process information.
+        AI is fundamentally changing our relationship with digital systems.
+        Smart algorithms are reshaping our technological landscape dramatically.
+        Computational intelligence is altering how machines solve complex problems.
+        Neural networks are revolutionizing the capabilities of modern computers.
+        """
+    ]
+    # Repeat the paragraphs to create a longer document
+    document = "\n\n".join(base_paragraphs * 3)
+    return document
+def generate_mixed_document() -> str:
+    """
+    Generate a document with a mix of repetitive and unique content.
+    """
+    repetitive = generate_repetitive_document()
+    unique = """
+    Energy efficiency is the goal to reduce the amount of energy required to provide products and services.
+    For example, insulating a home allows a building to use less heating and cooling energy to achieve and
+    maintain a comfortable temperature. Installing LED bulbs, fluorescent lighting, or natural skylights reduces
+    the amount of energy required to attain the same level of illumination compared with using traditional
+    incandescent light bulbs. Improvements in energy efficiency are generally achieved by adopting a more
+    efficient technology or production process or by application of commonly accepted methods to reduce energy
+    losses.
+    Biodiversity is the variety and variability of life on Earth. It is typically a measure of variation at the
+    genetic, species, and ecosystem level. Terrestrial biodiversity is usually greater near the equator, which is
+    the result of the warm climate and high primary productivity. Biodiversity is not distributed evenly on Earth,
+    and is richer in the tropics. These tropical forest ecosystems cover less than 10% of earth's surface, and
+    contain about 90% of the world's species. Marine biodiversity is usually highest along coasts in the Western
+    Pacific, where sea surface temperature is highest, and in the mid-latitudinal band in all oceans.
+    """
+    return repetitive + "\n\n" + unique
+def run_deduplication_benchmark() -> None:
+    """
+    Run a benchmark specifically testing the semantic deduplication capabilities.
+    """
+    logger.info("Starting deduplication benchmark")
+    # Initialize context manager with various thresholds
+    thresholds = [0.7, 0.8, 0.85, 0.9, 0.95]
+    results = []
+    # Create documents
+    repetitive_doc = generate_repetitive_document()
+    mixed_doc = generate_mixed_document()
+    logger.info(f"Repetitive document size: {len(repetitive_doc.split())} words")
+    logger.info(f"Mixed document size: {len(mixed_doc.split())} words")
+    for threshold in thresholds:
+        logger.info(f"\nTesting with threshold: {threshold}")
+        # Create a fresh context manager with the current threshold
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=256),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Test with repetitive document
+        logger.info("Processing repetitive document...")
+        start_time = time.time()
+        doc_id = context_manager.add_document(repetitive_doc)
+        processing_time = time.time() - start_time
+        # Generate context with a relevant query to see compression in action
+        query = "Tell me about climate change and renewable energy"
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        # Record result
+        result = {
+            "threshold": threshold,
+            "document_type": "repetitive",
+            "original_size": len(repetitive_doc.split()),
+            "context_size": len(context.split()),
+            "processing_time": processing_time,
+            "query_time": query_time,
+            "chunks": len(context_manager.chunks)
+        }
+        results.append(result)
+        logger.info(f"  - Original size: {result['original_size']} words")
+        logger.info(f"  - Context size: {result['context_size']} words")
+        logger.info(f"  - Compression ratio: {result['context_size'] / result['original_size']:.2f}")
+        logger.info(f"  - Processing time: {result['processing_time']:.4f} seconds")
+        logger.info(f"  - Query time: {result['query_time']:.4f} seconds")
+        # Reset the context manager
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=256),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Test with mixed document
+        logger.info("Processing mixed document...")
+        start_time = time.time()
+        doc_id = context_manager.add_document(mixed_doc)
+        processing_time = time.time() - start_time
+        # Generate context with a relevant query
+        query = "Tell me about climate change and biodiversity"
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        # Record result
+        result = {
+            "threshold": threshold,
+            "document_type": "mixed",
+            "original_size": len(mixed_doc.split()),
+            "context_size": len(context.split()),
+            "processing_time": processing_time,
+            "query_time": query_time,
+            "chunks": len(context_manager.chunks)
+        }
+        results.append(result)
+        logger.info(f"  - Original size: {result['original_size']} words")
+        logger.info(f"  - Context size: {result['context_size']} words")
+        logger.info(f"  - Compression ratio: {result['context_size'] / result['original_size']:.2f}")
+        logger.info(f"  - Processing time: {result['processing_time']:.4f} seconds")
+        logger.info(f"  - Query time: {result['query_time']:.4f} seconds")
+    # Print summary
+    logger.info("\nDeduplication Benchmark Summary:")
+    logger.info("-----------------------------------")
+    logger.info("\nRepetitive Document Results:")
+    for result in [r for r in results if r["document_type"] == "repetitive"]:
+        logger.info(f"Threshold {result['threshold']}: {result['context_size'] / result['original_size']:.2f} compression ratio, {result['processing_time']:.4f}s processing time")
+    logger.info("\nMixed Document Results:")
+    for result in [r for r in results if r["document_type"] == "mixed"]:
+        logger.info(f"Threshold {result['threshold']}: {result['context_size'] / result['original_size']:.2f} compression ratio, {result['processing_time']:.4f}s processing time")
+def main():
+    """Main function for the deduplication benchmark script."""
+    parser = argparse.ArgumentParser(description="Benchmark efficient-context's semantic deduplication")
+    args = parser.parse_args()
+    run_deduplication_benchmark()
+if __name__ == "__main__":
+    main()

examples/dedup_eval.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python
+"""
+Advanced test for efficient-context's deduplication capabilities
+"""
+import time
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Import the library
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+def create_repetitive_document():
+    """Create a document with highly repetitive semantic content"""
+    # Create repetitive paragraphs with the same semantic meaning expressed differently
+    paragraphs = []
+    # Climate change variations
+    climate_variations = [
+        "Climate change is a significant alteration in global weather patterns over extended periods.",
+        "Global warming refers to the long-term increase in Earth's average temperature.",
+        "The climate crisis is causing significant shifts in temperature and precipitation patterns worldwide.",
+        "Rising global temperatures lead to fundamental changes in our planet's climate systems.",
+        "Human-induced warming of the Earth's atmosphere is resulting in climate destabilization."
+    ]
+    paragraphs.extend(climate_variations)
+    # Renewable energy variations
+    energy_variations = [
+        "Renewable energy comes from natural sources that are constantly replenished.",
+        "Clean energy technologies harness power from sustainable, non-depleting resources.",
+        "Green power is generated from environmentally friendly, renewable sources.",
+        "Sustainable energy is derived from resources that don't run out over time.",
+        "Alternative energy refers to power sources that are alternatives to fossil fuels."
+    ]
+    paragraphs.extend(energy_variations)
+    # Add some unique content as well
+    unique_content = [
+        "Machine learning algorithms require significant computational resources to train effectively.",
+        "Biodiversity loss is accelerating at an unprecedented rate due to human activities.",
+        "Quantum computing may revolutionize cryptography and computational chemistry."
+    ]
+    paragraphs.extend(unique_content)
+    # Repeat the document to make it longer and more repetitive
+    document = "\n\n".join(paragraphs * 3)  # Repeat 3 times
+    return document
+def run_deduplication_test():
+    """Test the semantic deduplication capabilities"""
+    logger.info("Running semantic deduplication test")
+    # Create a highly repetitive document
+    document = create_repetitive_document()
+    logger.info(f"Document size: {len(document.split())} words")
+    # Test with different threshold values
+    thresholds = [0.7, 0.8, 0.85, 0.9, 0.95]
+    for threshold in thresholds:
+        logger.info(f"\nTesting threshold: {threshold}")
+        # Create context manager with current threshold
+        cm = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=200),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Add document and measure processing time
+        start = time.time()
+        doc_id = cm.add_document(document)
+        processing_time = time.time() - start
+        # Generate context for a relevant query
+        query = "Explain the relationship between climate change and renewable energy"
+        start = time.time()
+        context = cm.generate_context(query)
+        query_time = time.time() - start
+        # Calculate metrics
+        original_size = len(document.split())
+        context_size = len(context.split())
+        compression_ratio = context_size / original_size
+        # Report results
+        logger.info(f"Results for threshold {threshold}:")
+        logger.info(f"  - Original document: {original_size} words")
+        logger.info(f"  - Context generated: {context_size} words")
+        logger.info(f"  - Compression ratio: {compression_ratio:.2f}")
+        logger.info(f"  - Chunks created: {len(cm.chunks)}")
+        logger.info(f"  - Processing time: {processing_time:.4f} seconds")
+        logger.info(f"  - Query time: {query_time:.4f} seconds")
+        # Print a preview of the context
+        logger.info(f"  - Context preview: {context[:150]}...")
+if __name__ == "__main__":
+    try:
+        print("Starting deduplication evaluation...")
+        run_deduplication_test()
+        print("Evaluation completed successfully")
+    except Exception as e:
+        print(f"Error during evaluation: {e}")
+        import traceback
+        traceback.print_exc()

examples/dedup_test.py ADDED Viewed

	@@ -0,0 +1,49 @@

+"""
+Manual benchmark for the SemanticDeduplicator component.
+"""
+import sys
+import logging
+from efficient_context.compression import SemanticDeduplicator
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def main():
+    print("Testing SemanticDeduplicator")
+    # Create a repetitive document with semantically similar sentences
+    repetitive_text = """
+    Climate change is a significant global challenge.
+    Global warming is affecting ecosystems worldwide.
+    The Earth's temperature is rising due to human activities.
+    Climate change poses a serious threat to our planet.
+    Rising global temperatures are causing environmental problems.
+    Renewable energy is key to a sustainable future.
+    Clean energy sources help reduce carbon emissions.
+    Sustainable power generation is vital for fighting climate change.
+    Green energy technologies are becoming more affordable.
+    Renewable resources provide alternatives to fossil fuels.
+    """
+    print(f"Original text length: {len(repetitive_text.split())} words")
+    # Test with different thresholds
+    for threshold in [0.7, 0.8, 0.85, 0.9, 0.95]:
+        print(f"\nTesting threshold: {threshold}")
+        deduplicator = SemanticDeduplicator(threshold=threshold)
+        # Apply deduplication
+        compressed_text = deduplicator.compress(repetitive_text)
+        print(f"Compressed text length: {len(compressed_text.split())} words")
+        print(f"Compression ratio: {len(compressed_text.split()) / len(repetitive_text.split()):.2f}")
+        # Print the first 100 characters of the compressed text
+        print(f"Compressed text (preview): {compressed_text[:100]}...")
+if __name__ == "__main__":
+    main()

examples/deduplication_benchmark.py ADDED Viewed

	@@ -0,0 +1,277 @@

+#!/usr/bin/env python
+"""
+Specialized benchmark script for measuring the effectiveness of semantic deduplication
+in the efficient-context library.
+"""
+import logging
+import time
+import argparse
+import sys
+from typing import List, Dict, Any
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Import the library
+try:
+    from efficient_context import ContextManager
+    from efficient_context.compression import SemanticDeduplicator
+    from efficient_context.chunking import SemanticChunker
+    from efficient_context.retrieval import CPUOptimizedRetriever
+except ImportError as e:
+    logger.error(f"Failed to import efficient_context: {e}")
+    sys.exit(1)
+def generate_repetitive_document() -> str:
+    """
+    Generate a document with deliberate semantic repetition.
+    The document will contain sentences that mean the same thing
+    expressed in different ways.
+    """
+    # Base paragraphs with distinct topics
+    base_paragraphs = [
+        # Climate change paragraph with repetitive content
+        """
+        Climate change is a significant and lasting alteration in the statistical distribution of weather
+        patterns over periods ranging from decades to millions of years. Global warming is the long-term
+        heating of Earth's climate system observed since the pre-industrial period due to human activities.
+        The rise in global temperature is causing substantial changes in our environment and ecosystems.
+        The warming of the planet is leading to significant transformations in weather patterns worldwide.
+        Human activities are causing Earth's temperature to increase, resulting in climate modifications.
+        The climate crisis is fundamentally altering the Earth's atmosphere and affecting all living things.
+        """,
+        # Renewable energy paragraph with repetitive content
+        """
+        Renewable energy comes from sources that are naturally replenishing but flow-limited.
+        Clean energy is derived from natural processes that are constantly replenished.
+        Sustainable power is generated from resources that won't deplete over time.
+        Green energy utilizes sources that don't produce pollution when generating power.
+        Alternative energy refers to sources that are an alternative to fossil fuel.
+        Eco-friendly power generation relies on inexhaustible natural resources.
+        """,
+        # Technology paragraph with repetitive content
+        """
+        Artificial intelligence is revolutionizing how we interact with technology.
+        Machine learning is transforming the way computers process information.
+        AI is fundamentally changing our relationship with digital systems.
+        Smart algorithms are reshaping our technological landscape dramatically.
+        Computational intelligence is altering how machines solve complex problems.
+        Neural networks are revolutionizing the capabilities of modern computers.
+        """
+    ]
+    # Repeat the paragraphs to create a longer document
+    document = "\n\n".join(base_paragraphs * 3)
+    return document
+def generate_mixed_document() -> str:
+    """
+    Generate a document with a mix of repetitive and unique content.
+    """
+    repetitive = generate_repetitive_document()
+    unique = """
+    Energy efficiency is the goal to reduce the amount of energy required to provide products and services.
+    For example, insulating a home allows a building to use less heating and cooling energy to achieve and
+    maintain a comfortable temperature. Installing LED bulbs, fluorescent lighting, or natural skylights reduces
+    the amount of energy required to attain the same level of illumination compared with using traditional
+    incandescent light bulbs. Improvements in energy efficiency are generally achieved by adopting a more
+    efficient technology or production process or by application of commonly accepted methods to reduce energy
+    losses.
+    Biodiversity is the variety and variability of life on Earth. It is typically a measure of variation at the
+    genetic, species, and ecosystem level. Terrestrial biodiversity is usually greater near the equator, which is
+    the result of the warm climate and high primary productivity. Biodiversity is not distributed evenly on Earth,
+    and is richer in the tropics. These tropical forest ecosystems cover less than 10% of earth's surface, and
+    contain about 90% of the world's species. Marine biodiversity is usually highest along coasts in the Western
+    Pacific, where sea surface temperature is highest, and in the mid-latitudinal band in all oceans.
+    """
+    return repetitive + "\n\n" + unique
+def generate_repetitive_document() -> str:
+    """
+    Generate a document with deliberate semantic repetition.
+    The document will contain sentences that mean the same thing
+    expressed in different ways.
+    """
+    # Base paragraphs with distinct topics
+    base_paragraphs = [
+        # Climate change paragraph with repetitive content
+        """
+        Climate change is a significant and lasting alteration in the statistical distribution of weather
+        patterns over periods ranging from decades to millions of years. Global warming is the long-term
+        heating of Earth's climate system observed since the pre-industrial period due to human activities.
+        The rise in global temperature is causing substantial changes in our environment and ecosystems.
+        The warming of the planet is leading to significant transformations in weather patterns worldwide.
+        Human activities are causing Earth's temperature to increase, resulting in climate modifications.
+        The climate crisis is fundamentally altering the Earth's atmosphere and affecting all living things.
+        """,
+        # Renewable energy paragraph with repetitive content
+        """
+        Renewable energy comes from sources that are naturally replenishing but flow-limited.
+        Clean energy is derived from natural processes that are constantly replenished.
+        Sustainable power is generated from resources that won't deplete over time.
+        Green energy utilizes sources that don't produce pollution when generating power.
+        Alternative energy refers to sources that are an alternative to fossil fuel.
+        Eco-friendly power generation relies on inexhaustible natural resources.
+        """,
+        # Technology paragraph with repetitive content
+        """
+        Artificial intelligence is revolutionizing how we interact with technology.
+        Machine learning is transforming the way computers process information.
+        AI is fundamentally changing our relationship with digital systems.
+        Smart algorithms are reshaping our technological landscape dramatically.
+        Computational intelligence is altering how machines solve complex problems.
+        Neural networks are revolutionizing the capabilities of modern computers.
+        """
+    ]
+    # Repeat the paragraphs to create a longer document
+    document = "\n\n".join(base_paragraphs * 3)
+    return document
+def generate_mixed_document() -> str:
+    """
+    Generate a document with a mix of repetitive and unique content.
+    """
+    repetitive = generate_repetitive_document()
+    unique = """
+    Energy efficiency is the goal to reduce the amount of energy required to provide products and services.
+    For example, insulating a home allows a building to use less heating and cooling energy to achieve and
+    maintain a comfortable temperature. Installing LED bulbs, fluorescent lighting, or natural skylights reduces
+    the amount of energy required to attain the same level of illumination compared with using traditional
+    incandescent light bulbs. Improvements in energy efficiency are generally achieved by adopting a more
+    efficient technology or production process or by application of commonly accepted methods to reduce energy
+    losses.
+    Biodiversity is the variety and variability of life on Earth. It is typically a measure of variation at the
+    genetic, species, and ecosystem level. Terrestrial biodiversity is usually greater near the equator, which is
+    the result of the warm climate and high primary productivity. Biodiversity is not distributed evenly on Earth,
+    and is richer in the tropics. These tropical forest ecosystems cover less than 10% of earth's surface, and
+    contain about 90% of the world's species. Marine biodiversity is usually highest along coasts in the Western
+    Pacific, where sea surface temperature is highest, and in the mid-latitudinal band in all oceans.
+    """
+    return repetitive + "\n\n" + unique
+def run_deduplication_benchmark() -> None:
+    """
+    Run a benchmark specifically testing the semantic deduplication capabilities.
+    """
+    logger.info("Starting deduplication benchmark")
+    # Initialize context manager with various thresholds
+    thresholds = [0.7, 0.8, 0.85, 0.9, 0.95]
+    results = []
+    # Create documents
+    repetitive_doc = generate_repetitive_document()
+    mixed_doc = generate_mixed_document()
+    logger.info(f"Repetitive document size: {len(repetitive_doc.split())} words")
+    logger.info(f"Mixed document size: {len(mixed_doc.split())} words")
+    for threshold in thresholds:
+        logger.info(f"\nTesting with threshold: {threshold}")
+        # Create a fresh context manager with the current threshold
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=256),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Test with repetitive document
+        logger.info("Processing repetitive document...")
+        start_time = time.time()
+        doc_id = context_manager.add_document(repetitive_doc)
+        processing_time = time.time() - start_time
+        # Generate context with a relevant query to see compression in action
+        query = "Tell me about climate change and renewable energy"
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        # Record result
+        result = {
+            "threshold": threshold,
+            "document_type": "repetitive",
+            "original_size": len(repetitive_doc.split()),
+            "context_size": len(context.split()),
+            "processing_time": processing_time,
+            "query_time": query_time,
+            "chunks": len(context_manager.chunks)
+        }
+        results.append(result)
+        logger.info(f"  - Original size: {result['original_size']} words")
+        logger.info(f"  - Context size: {result['context_size']} words")
+        logger.info(f"  - Compression ratio: {result['context_size'] / result['original_size']:.2f}")
+        logger.info(f"  - Processing time: {result['processing_time']:.4f} seconds")
+        logger.info(f"  - Query time: {result['query_time']:.4f} seconds")
+        # Reset the context manager
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=256),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Test with mixed document
+        logger.info("Processing mixed document...")
+        start_time = time.time()
+        doc_id = context_manager.add_document(mixed_doc)
+        processing_time = time.time() - start_time
+        # Generate context with a relevant query
+        query = "Tell me about climate change and biodiversity"
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        # Record result
+        result = {
+            "threshold": threshold,
+            "document_type": "mixed",
+            "original_size": len(mixed_doc.split()),
+            "context_size": len(context.split()),
+            "processing_time": processing_time,
+            "query_time": query_time,
+            "chunks": len(context_manager.chunks)
+        }
+        results.append(result)
+        logger.info(f"  - Original size: {result['original_size']} words")
+        logger.info(f"  - Context size: {result['context_size']} words")
+        logger.info(f"  - Compression ratio: {result['context_size'] / result['original_size']:.2f}")
+        logger.info(f"  - Processing time: {result['processing_time']:.4f} seconds")
+        logger.info(f"  - Query time: {result['query_time']:.4f} seconds")
+    # Print summary
+    logger.info("\nDeduplication Benchmark Summary:")
+    logger.info("-----------------------------------")
+    logger.info("\nRepetitive Document Results:")
+    for result in [r for r in results if r["document_type"] == "repetitive"]:
+        logger.info(f"Threshold {result['threshold']}: {result['context_size'] / result['original_size']:.2f} compression ratio, {result['processing_time']:.4f}s processing time")
+    logger.info("\nMixed Document Results:")
+    for result in [r for r in results if r["document_type"] == "mixed"]:
+        logger.info(f"Threshold {result['threshold']}: {result['context_size'] / result['original_size']:.2f} compression ratio, {result['processing_time']:.4f}s processing time")
+def main():
+    """Main function for the deduplication benchmark script."""
+    parser = argparse.ArgumentParser(description="Benchmark efficient-context's semantic deduplication")
+    args = parser.parse_args()
+    run_deduplication_benchmark()
+if __name__ == "__main__":
+    main()

examples/demo_notebook.ipynb ADDED Viewed

File without changes

examples/llm_integration.py ADDED Viewed

	@@ -0,0 +1,164 @@

+"""
+Example of integrating efficient-context with a lightweight LLM.
+"""
+import logging
+import time
+from typing import List, Dict, Any, Optional
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class LightweightLLM:
+    """
+    A simple wrapper for a lightweight LLM.
+    This is a placeholder that would be replaced with an actual
+    lightweight LLM implementation in a real application.
+    """
+    def __init__(self, model_name: str = "tiny-llm"):
+        """
+        Initialize the lightweight LLM.
+        Args:
+            model_name: Name of the model to use
+        """
+        self.model_name = model_name
+        logger.info(f"Initialized LightweightLLM with model: {model_name}")
+        # This would be where you'd load your model in a real implementation
+        logger.info("Note: This is a placeholder class for demonstration purposes")
+    def generate(
+        self,
+        prompt: str,
+        context: Optional[str] = None,
+        max_tokens: int = 512
+    ) -> str:
+        """
+        Generate text using the LLM.
+        Args:
+            prompt: The prompt for generation
+            context: Optional context to condition the generation
+            max_tokens: Maximum number of tokens to generate
+        Returns:
+            response: Generated text response
+        """
+        # This is a fake implementation for demonstration
+        # In a real application, you'd call your LLM here
+        logger.info(f"Generating response with context size: {len(context.split()) if context else 0} tokens")
+        # Simulate generation time based on context size
+        if context:
+            time.sleep(0.001 * len(context.split()))  # Simulate processing time
+            # Simple keyword detection for demo purposes
+            if "renewable energy" in context and "climate" in context:
+                return "Renewable energy has a positive impact on climate change mitigation by reducing greenhouse gas emissions. The transition from fossil fuels to renewable sources like wind and solar is crucial for limiting global warming."
+            elif "rural" in context and "renewable" in context:
+                return "Renewable energy technologies are well-suited for rural and remote areas. They can provide decentralized power generation, improving energy access in areas without reliable grid connections, which is critical for human development."
+            else:
+                return "Renewable energy sources are sustainable alternatives to fossil fuels. They include solar, wind, hydro, geothermal, and biomass energy, and their use is growing globally."
+        else:
+            return "I don't have enough context to provide a detailed answer on this topic."
+def main():
+    # Sample documents - in a real application, you might load these from files
+    documents = [
+        {
+            "content": """
+            Renewable energy is derived from natural sources that are replenished at a higher rate than they are consumed.
+            Sunlight and wind, for example, are such sources that are constantly being replenished.
+            Renewable energy resources exist over wide geographical areas, in contrast to fossil fuels,
+            which are concentrated in a limited number of countries.
+            Rapid deployment of renewable energy and energy efficiency technologies is resulting in significant
+            energy security, climate change mitigation, and economic benefits.
+            In international public opinion surveys there is strong support for promoting renewable sources
+            such as solar power and wind power.
+            While many renewable energy projects are large-scale, renewable technologies are also suited to rural
+            and remote areas and developing countries, where energy is often crucial in human development.
+            As most of the renewable energy technologies provide electricity, renewable energy is often deployed
+            together with further electrification, which has several benefits: electricity can be converted to heat,
+            can be converted into mechanical energy with high efficiency, and is clean at the point of consumption.
+            """,
+            "metadata": {"topic": "renewable energy", "source": "example"}
+        },
+        {
+            "content": """
+            Climate change mitigation consists of actions to limit global warming and its related effects.
+            This involves reductions in human emissions of greenhouse gases (GHGs) as well as activities
+            that reduce their concentration in the atmosphere.
+            Fossil fuels account for more than 70% of GHG emissions. The energy sector contributes to global
+            emissions, mainly through the burning of fossil fuels to generate electricity and heat,
+            and through the use of gasoline and diesel to power vehicles.
+            A transition to renewable energy is a key component of climate change mitigation. By replacing
+            fossil fuel power plants with renewable energy sources, such as wind and solar, we can reduce
+            the amount of greenhouse gases emitted into the atmosphere.
+            Renewable energy can also play a role in adapting to climate change, for example by providing
+            reliable power for cooling in increasingly hot regions, or by ensuring energy access in the
+            aftermath of climate-related disasters.
+            """,
+            "metadata": {"topic": "climate change", "source": "example"}
+        },
+    ]
+    # Initialize a context manager with custom strategies
+    context_manager = ContextManager(
+        compressor=SemanticDeduplicator(threshold=0.85),
+        chunker=SemanticChunker(chunk_size=256),
+        retriever=CPUOptimizedRetriever(embedding_model="lightweight"),
+        max_context_size=512  # Intentionally small for demonstration
+    )
+    # Initialize a lightweight LLM
+    llm = LightweightLLM()
+    # Add documents to the context manager
+    document_ids = context_manager.add_documents(documents)
+    # Example queries
+    queries = [
+        "Tell me about the climate impact of renewable energy",
+        "How does renewable energy work in rural areas?",
+        "What are the advantages of using renewable energy?"
+    ]
+    # Process each query
+    for query in queries:
+        print(f"\n\n=== QUERY: {query} ===")
+        # Generate optimized context for the query
+        start_time = time.time()
+        optimized_context = context_manager.generate_context(query=query)
+        context_time = time.time() - start_time
+        print(f"Context generation took {context_time:.3f} seconds")
+        print(f"Context size: {len(optimized_context.split())} tokens")
+        # Generate response using the LLM with the optimized context
+        start_time = time.time()
+        response = llm.generate(prompt=query, context=optimized_context)
+        llm_time = time.time() - start_time
+        print(f"LLM generation took {llm_time:.3f} seconds")
+        print(f"--- RESPONSE ---")
+        print(response)
+        print("-" * 50)
+if __name__ == "__main__":
+    main()

examples/simple_dedup_benchmark.py ADDED Viewed

	@@ -0,0 +1,92 @@

+#!/usr/bin/env python
+"""
+Simple benchmark for efficient-context's semantic deduplication.
+"""
+import logging
+import time
+import sys
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+logger.info("Simple deduplication benchmark starting")
+# Import the library
+try:
+    from efficient_context import ContextManager
+    from efficient_context.compression import SemanticDeduplicator
+    from efficient_context.chunking import SemanticChunker
+    from efficient_context.retrieval import CPUOptimizedRetriever
+    logger.info("Successfully imported efficient_context")
+except ImportError as e:
+    logger.error(f"Failed to import efficient_context: {e}")
+    sys.exit(1)
+def create_repetitive_document():
+    """Create a document with deliberate repetition"""
+    # Base paragraphs with repetitive content
+    climate_paragraph = """
+    Climate change is a significant alteration in weather patterns over extended periods.
+    Global warming is the long-term heating of Earth's climate system due to human activities.
+    Rising global temperatures are causing substantial changes in our environment and ecosystems.
+    The warming of the planet is leading to significant transformations in weather patterns.
+    Human activities are causing Earth's temperature to increase, resulting in climate changes.
+    """
+    energy_paragraph = """
+    Renewable energy comes from sources that are naturally replenishing but flow-limited.
+    Clean energy is derived from natural processes that are constantly replenished.
+    Sustainable power is generated from resources that won't deplete over time.
+    Green energy utilizes sources that don't produce pollution when generating power.
+    Alternative energy refers to sources that are an alternative to fossil fuel.
+    """
+    # Repeat the paragraphs to create a more repetitive document
+    document = (climate_paragraph + energy_paragraph) * 3
+    return document
+def main():
+    """Run the benchmark"""
+    # Create the test document
+    document = create_repetitive_document()
+    logger.info(f"Document size: {len(document.split())} words")
+    # Test with different thresholds
+    thresholds = [0.7, 0.8, 0.85, 0.9, 0.95]
+    for threshold in thresholds:
+        logger.info(f"\nTesting with threshold: {threshold}")
+        # Create a context manager with the current threshold
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=threshold),
+            chunker=SemanticChunker(chunk_size=100),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+        )
+        # Process the document
+        start_time = time.time()
+        doc_id = context_manager.add_document(document)
+        processing_time = time.time() - start_time
+        # Generate context with a query
+        query = "Tell me about climate change and renewable energy"
+        start_time = time.time()
+        context = context_manager.generate_context(query)
+        query_time = time.time() - start_time
+        # Report results
+        original_size = len(document.split())
+        context_size = len(context.split())
+        compression_ratio = context_size / original_size if original_size > 0 else 1.0
+        logger.info(f"Results for threshold {threshold}:")
+        logger.info(f"  - Original size: {original_size} words")
+        logger.info(f"  - Context size: {context_size} words")
+        logger.info(f"  - Compression ratio: {compression_ratio:.2f}")
+        logger.info(f"  - Processing time: {processing_time:.4f} seconds")
+        logger.info(f"  - Query time: {query_time:.4f} seconds")
+if __name__ == "__main__":
+    main()

examples/simple_test.py ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env python
+"""
+Basic test for efficient-context
+"""
+import os
+import sys
+import time
+print(f"Python version: {sys.version}")
+print(f"Current directory: {os.getcwd()}")
+print(f"Python path: {sys.path}")
+try:
+    print("Testing efficient-context library...")
+    # Create a simple context manager
+    from efficient_context import ContextManager
+    from efficient_context.compression import SemanticDeduplicator
+    from efficient_context.chunking import SemanticChunker
+    from efficient_context.retrieval import CPUOptimizedRetriever
+    print("Successfully imported efficient_context")
+except Exception as e:
+    print(f"Error importing efficient_context: {e}")
+    sys.exit(1)
+cm = ContextManager(
+    compressor=SemanticDeduplicator(threshold=0.85),
+    chunker=SemanticChunker(chunk_size=200),
+    retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+)
+# Add a document
+doc = """
+Renewable energy comes from sources that are naturally replenishing but flow-limited.
+Clean energy is derived from natural processes that are constantly replenished.
+Sustainable power is generated from resources that won't deplete over time.
+Green energy utilizes sources that don't produce pollution when generating power.
+Alternative energy refers to sources that are an alternative to fossil fuel.
+Eco-friendly power generation relies on inexhaustible natural resources.
+Climate change is a significant and lasting alteration in the statistical distribution
+of weather patterns over periods ranging from decades to millions of years.
+Global warming is the long-term heating of Earth's climate system observed since
+the pre-industrial period due to human activities.
+"""
+print(f"Document size: {len(doc.split())} words")
+# Add the document
+start = time.time()
+doc_id = cm.add_document(doc)
+processing_time = time.time() - start
+print(f"Document processed in {processing_time:.4f} seconds")
+print(f"Created {len(cm.chunks)} chunks")
+# Generate context
+query = "Tell me about renewable energy"
+start = time.time()
+context = cm.generate_context(query)
+query_time = time.time() - start
+# Print results
+print(f"Query time: {query_time:.4f} seconds")
+print(f"Context size: {len(context.split())} words")
+print(f"Context: {context[:150]}...")
+print("Test completed successfully")

model_card.md ADDED Viewed

	@@ -0,0 +1,91 @@

+# efficient-context
+A Python library for optimizing LLM context handling in CPU-constrained environments.
+## Model / Library Description
+`efficient-context` addresses the challenge of working with large language models (LLMs) on CPU-only and memory-limited systems by providing efficient context management strategies. The library focuses on making LLMs more usable when computational resources are limited.
+## Intended Use
+This library is designed for:
+- Deploying LLMs in resource-constrained environments
+- Optimizing context handling for edge devices
+- Creating applications that need to run on standard hardware
+- Reducing memory usage when working with large documents
+## Features
+### Context Compression
+- Semantic deduplication to remove redundant information
+- Importance-based pruning that keeps critical information
+- Automatic summarization of less relevant sections
+### Advanced Chunking
+- Semantic chunking that preserves logical units
+- Adaptive chunk sizing based on content complexity
+- Chunk relationships mapping for coherent retrieval
+### Retrieval Optimization
+- Lightweight embedding models optimized for CPU
+- Tiered retrieval strategies (local vs. remote)
+- Query-aware context assembly
+### Memory Management
+- Progressive loading/unloading of context
+- Streaming context processing
+- Memory-aware caching strategies
+## Installation
+```bash
+pip install efficient-context
+```
+## Usage
+```python
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Initialize a context manager with custom strategies
+context_manager = ContextManager(
+    compressor=SemanticDeduplicator(threshold=0.85),
+    chunker=SemanticChunker(chunk_size=256),
+    retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+)
+# Add documents to your context
+context_manager.add_documents(documents)
+# Generate optimized context for a query
+optimized_context = context_manager.generate_context(
+    query="Tell me about the climate impact of renewable energy"
+)
+# Use the optimized context with your LLM
+response = your_llm_model.generate(prompt=prompt, context=optimized_context)
+```
+## Performance and Benchmarks
+The library has demonstrated excellent performance in handling repetitive content:
+- With a threshold of 0.7, it achieved a 57.5% reduction in token count
+- Processing times: 0.13-0.84 seconds for a 426-word document
+- Query time: 0.08-0.14 seconds
+## Limitations
+- Designed primarily for text data
+- Performance depends on the quality of embedding models
+- Semantic deduplication may occasionally remove content that appears similar but has subtle differences
+## Maintainer
+This project is maintained by [Biswanath Roul](https://github.com/biswanathroul)
+## License
+MIT

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+[tool.black]
+line-length = 88
+include = '\.pyi?$'
+[tool.isort]
+profile = "black"
+line_length = 88
+[tool.pytest]
+testpaths = ["tests"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+numpy>=1.19.0
+scikit-learn>=0.24.0
+sentence-transformers>=2.2.2
+nltk>=3.6.0
+pydantic>=1.8.0
+tqdm>=4.62.0
+psutil>=5.9.0

setup.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from setuptools import setup, find_packages
+with open("README.md", "r", encoding="utf-8") as fh:
+    long_description = fh.read()
+setup(
+    name="efficient-context",
+    version="0.1.0",
+    author="Biswanath Roul",
+    description="Optimize LLM context handling in CPU-constrained environments",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/biswanathroul/efficient-context",
+    packages=find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    python_requires=">=3.7",
+    install_requires=[
+        "numpy>=1.19.0",
+        "scikit-learn>=0.24.0",
+        "sentence-transformers>=2.2.2",
+        "nltk>=3.6.0",
+        "pydantic>=1.8.0",
+        "tqdm>=4.62.0",
+    ],
+    keywords="llm, context, optimization, cpu, memory, efficiency, nlp",
+)

test_simple.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+Simple test script for efficient-context library.
+"""
+import logging
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker
+from efficient_context.retrieval import CPUOptimizedRetriever
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def test_basic_functionality():
+    """Test the basic functionality of the library."""
+    print("\n=== Testing Basic Functionality ===")
+    # Sample document - notice we've removed indentation and added more content
+    document = """Renewable energy is derived from natural sources that are replenished at a higher rate than they are consumed.
+Sunlight and wind, for example, are such sources that are constantly being replenished.
+Renewable energy resources exist over wide geographical areas, in contrast to fossil fuels,
+which are concentrated in a limited number of countries.
+Rapid deployment of renewable energy and energy efficiency technologies is resulting in significant
+energy security, climate change mitigation, and economic benefits.
+In international public opinion surveys there is strong support for promoting renewable sources
+such as solar power and wind power.
+While many renewable energy projects are large-scale, renewable technologies are also suited to rural
+and remote areas and developing countries, where energy is often crucial in human development.
+As most of the renewable energy technologies provide electricity, renewable energy is often deployed
+together with further electrification, which has several benefits."""
+    # Initialize context manager
+    context_manager = ContextManager(
+        compressor=SemanticDeduplicator(threshold=0.85),
+        chunker=SemanticChunker(chunk_size=100),
+        retriever=CPUOptimizedRetriever(embedding_model="lightweight")
+    )
+    # Add document
+    print(f"Document length: {len(document.split())} words")
+    doc_id = context_manager.add_document(document)
+    print(f"Added document with ID: {doc_id}")
+    print(f"Created {len(context_manager.chunks)} chunks")
+    # Debug information about chunks
+    if len(context_manager.chunks) > 0:
+        print("\nChunk information:")
+        for i, chunk in enumerate(context_manager.chunks):
+            print(f"Chunk {i+1}: {len(chunk.content.split())} words")
+            print(f"Content sample: {chunk.content[:50]}...")
+    else:
+        print("\nWARNING: No chunks were created. This is likely an issue with the chunker.")
+        # Let's try direct chunking to debug
+        print("\nTrying direct chunking:")
+        chunks = context_manager.chunker.chunk(document, document_id=doc_id)
+        print(f"Direct chunking created {len(chunks)} chunks")
+        if len(chunks) > 0:
+            print(f"Sample chunk content: {chunks[0].content[:50]}...")
+    # Test query
+    query = "Tell me about renewable energy sources"
+    print(f"\nQuery: {query}")
+    # Get context
+    context = context_manager.generate_context(query)
+    print(f"\nGenerated context ({len(context.split())} tokens):")
+    print(context)
+    print("\n=== Test completed successfully ===")
+if __name__ == "__main__":
+    test_basic_functionality()

tests/test_core.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+Tests for the core functionality of efficient-context.
+"""
+import unittest
+from efficient_context import ContextManager
+from efficient_context.compression import SemanticDeduplicator
+from efficient_context.chunking import SemanticChunker, Chunk
+from efficient_context.retrieval import CPUOptimizedRetriever
+from efficient_context.memory import MemoryManager
+class TestEfficientContext(unittest.TestCase):
+    """Test cases for efficient-context functionality."""
+    def setUp(self):
+        """Set up test environment."""
+        self.sample_text = """
+        Renewable energy is derived from natural sources that are replenished at a higher rate than they are consumed.
+        Sunlight and wind, for example, are such sources that are constantly being replenished.
+        Renewable energy resources exist over wide geographical areas, in contrast to fossil fuels,
+        which are concentrated in a limited number of countries.
+        Rapid deployment of renewable energy and energy efficiency technologies is resulting in significant
+        energy security, climate change mitigation, and economic benefits.
+        In international public opinion surveys there is strong support for promoting renewable sources
+        such as solar power and wind power.
+        While many renewable energy projects are large-scale, renewable technologies are also suited to rural
+        and remote areas and developing countries, where energy is often crucial in human development.
+        As most of the renewable energy technologies provide electricity, renewable energy is often deployed
+        together with further electrification, which has several benefits: electricity can be converted to heat,
+        can be converted into mechanical energy with high efficiency, and is clean at the point of consumption.
+        """
+    def test_semantic_deduplicator(self):
+        """Test the semantic deduplicator functionality."""
+        compressor = SemanticDeduplicator(threshold=0.9)
+        compressed = compressor.compress(self.sample_text)
+        # Test that compression reduces size
+        self.assertLess(len(compressed), len(self.sample_text))
+        # Test that key content is preserved
+        self.assertIn("Renewable energy", compressed)
+    def test_semantic_chunker(self):
+        """Test the semantic chunker functionality."""
+        chunker = SemanticChunker(chunk_size=100, chunk_overlap=10)
+        chunks = chunker.chunk(self.sample_text, document_id="test-doc")
+        # Test that chunks were created
+        self.assertGreater(len(chunks), 0)
+        # Test that each chunk has content and metadata
+        for chunk in chunks:
+            self.assertIsInstance(chunk, Chunk)
+            self.assertTrue(chunk.content)
+            self.assertEqual(chunk.document_id, "test-doc")
+    def test_cpu_optimized_retriever(self):
+        """Test the CPU-optimized retriever functionality."""
+        retriever = CPUOptimizedRetriever(embedding_model="lightweight")
+        # Create test chunks
+        chunks = [
+            Chunk(content="Renewable energy is a sustainable energy source.", chunk_id="1"),
+            Chunk(content="Climate change is a global challenge.", chunk_id="2"),
+            Chunk(content="Fossil fuels contribute to greenhouse gas emissions.", chunk_id="3")
+        ]
+        # Index chunks
+        retriever.index_chunks(chunks)
+        # Test retrieval
+        query = "What are the environmental impacts of energy sources?"
+        results = retriever.retrieve(query, top_k=2)
+        # Should return some results
+        self.assertEqual(len(results), 2)
+        # Clear index
+        retriever.clear()
+        self.assertEqual(len(retriever.chunks), 0)
+    def test_context_manager_integration(self):
+        """Test full integration of all components."""
+        # Initialize context manager
+        context_manager = ContextManager(
+            compressor=SemanticDeduplicator(threshold=0.85),
+            chunker=SemanticChunker(chunk_size=100),
+            retriever=CPUOptimizedRetriever(embedding_model="lightweight"),
+            memory_manager=MemoryManager()
+        )
+        # Add document
+        doc_id = context_manager.add_document(self.sample_text)
+        # Test document was added
+        self.assertIn(doc_id, context_manager.documents)
+        # Test context generation
+        query = "Tell me about renewable energy in rural areas"
+        context = context_manager.generate_context(query)
+        # Should return some context
+        self.assertTrue(context)
+        # Clear context manager
+        context_manager.clear()
+        self.assertEqual(len(context_manager.documents), 0)
+        self.assertEqual(len(context_manager.chunks), 0)
+if __name__ == "__main__":
+    unittest.main()