# src/embedding_generator/embedder.py from langchain_ollama import OllamaEmbeddings # cite: embed_pipeline.py, query_pipeline.py from config.settings import OLLAMA_URL, EMBED_MODEL import logging from typing import List logger = logging.getLogger(__name__) class EmbeddingGenerator: """ Manages the embedding model and generates embeddings. """ def __init__(self): # Initialize the OllamaEmbeddings model # --- Financial Ministry Adaptation --- # Consider adding error handling for unreachable Ollama server. # For production, evaluate if Ollama is suitable or if a more robust/managed # embedding service is required based on load and reliability needs. # ------------------------------------ try: self.embedder = OllamaEmbeddings(base_url=OLLAMA_URL, model=EMBED_MODEL) # cite: embed_pipeline.py, query_pipeline.py logger.info(f"Initialized embedding model: {EMBED_MODEL} at {OLLAMA_URL}") except Exception as e: logger.critical(f"Failed to initialize embedding model: {e}") # Depending on requirements, you might want to re-raise or exit raise e def generate_embeddings(self, texts: List[str]) -> List[List[float]]: """ Generates embeddings for a list of text inputs. Args: texts: A list of strings to embed. Returns: A list of embeddings (list of floats). """ # --- Financial Ministry Adaptation --- # Implement retry logic for API calls to the embedding service. # Consider potential rate limits. # ------------------------------------ try: embeddings = self.embedder.embed_documents(texts) # Used internally by add_documents, but good to have explicit method # If using embed_query for a single text: # embedding = self.embedder.embed_query(texts[0]) logger.debug(f"Generated {len(embeddings)} embeddings.") return embeddings except Exception as e: logger.error(f"Failed to generate embeddings: {e}") raise e def generate_query_embedding(self, text: str) -> List[float]: """ Generates an embedding for a single query text. Args: text: The query string. Returns: An embedding (list of floats). """ # --- Financial Ministry Adaptation --- # Implement retry logic for API calls. # ------------------------------------ try: embedding = self.embedder.embed_query(text) # cite: query_pipeline.py (implicitly used by retriever) logger.debug("Generated query embedding.") return embedding except Exception as e: logger.error(f"Failed to generate query embedding: {e}") raise e