File size: 1,894 Bytes
4cbe4e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import logging
from elasticsearch import Elasticsearch, exceptions
from typing import Dict, Any
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
embedding_dimension = 1536
def create_mapping(properties: Dict[str, Any]) -> Dict[str, Any]:
"""Helper function to create index mappings with predefined settings."""
return {
"settings": {"number_of_shards": 1, "number_of_replicas": 1},
"mappings": {"properties": properties},
}
def retrieval_index() -> Dict[str, Any]:
"""Returns the Elasticsearch mapping for retrieval indices."""
return create_mapping(
{
"chunk_id": {"type": "keyword"},
"chunk": {"type": "text"},
"embedding": {
"type": "dense_vector",
"dims": embedding_dimension,
},
"certification": {"type": "keyword"},
"source_file": {"type": "keyword"},
"timestamp": {"type": "date"},
}
)
def create_elasticsearch_index(es_client: Elasticsearch, index_name: str) -> bool:
"""
Create an Elasticsearch index with the appropriate mapping.
Args:
es_client (Elasticsearch): The Elasticsearch client instance.
index_name (str): The name of the index to create.
Returns:
bool: True if the index was created successfully, False otherwise.
"""
try:
mapping = retrieval_index()
if es_client.indices.exists(index=index_name):
logger.warning(f"Index '{index_name}' already exists. Skipping creation.")
return True
es_client.indices.create(index=index_name, body=mapping)
logger.info(f"Index '{index_name}' created successfully.")
return True
except Exception as e:
logger.error(f"Unexpected error while creating index '{index_name}': {e}")
return False |