from llama_index.vector_stores.upstash import UpstashVectorStore from llama_index.core.storage.storage_context import StorageContext from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, load_index_from_storage from llama_index.core.node_parser import SimpleNodeParser from llama_index.core.settings import Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding import os Settings.embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") def get_upstash_vector_store(): return UpstashVectorStore( url=os.environ["UPSTASH_VECTOR_REST_URL"], token=os.environ["UPSTASH_VECTOR_REST_TOKEN"], ) def build_news_index(data_dir: str) -> VectorStoreIndex: documents = SimpleDirectoryReader(data_dir).load_data() nodes = SimpleNodeParser.from_defaults().get_nodes_from_documents(documents) vector_store = get_upstash_vector_store() storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex(nodes, storage_context=storage_context) return index def load_news_index() -> VectorStoreIndex: vector_store = get_upstash_vector_store() storage_context = StorageContext.from_defaults(vector_store=vector_store) return load_index_from_storage(storage_context) def get_or_build_index(data_dir: str) -> VectorStoreIndex: # This should check if the index already exists in Upstash try: return load_news_index() except Exception: return build_news_index(data_dir)