Tesneem commited on
Commit
d2155df
Β·
verified Β·
1 Parent(s): ef3044e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -20
app.py CHANGED
@@ -31,12 +31,6 @@ collection_name = os.getenv("MONGO_COLLECTION", "files_collection")
31
  index_name = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
32
 
33
  HF_TOKEN = os.getenv("HF_TOKEN")
34
- try:
35
- api = HfApi(token=HF_TOKEN)
36
- user_info = api.whoami()
37
- st.success(f"βœ… Hugging Face token is valid. Logged in as: {user_info['name']}")
38
- except Exception as e:
39
- st.error(f"❌ Hugging Face token is invalid or has insufficient permissions: {e}")
40
 
41
  MONGO_URI = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority"
42
 
@@ -60,22 +54,54 @@ Respond truthfully. If the answer is not available, say "This information is not
60
  # =================== Vector Search Setup ===================
61
  @st.cache_resource
62
  def init_vector_search() -> MongoDBAtlasVectorSearch:
63
- embedding_model = HuggingFaceInferenceAPIEmbeddings(
64
- api_key=HF_TOKEN,
65
- model_name="sentence-transformers/paraphrase-MiniLM-L6-v2"
66
- )
 
 
67
  try:
68
- test_vector = embedding_model.embed_query("test query")
69
- print("βœ… Embedding model test vector:", test_vector[:5])
 
 
 
 
 
 
 
 
70
  except Exception as e:
71
- st.error(f"❌ Failed to embed test query: {e}")
72
- raise
73
- return MongoDBAtlasVectorSearch.from_connection_string(
74
- connection_string=MONGO_URI,
75
- namespace=f"{db_name}.{collection_name}",
76
- embedding=embedding_model,
77
- index_name=index_name,
78
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
  # =================== Format Retrieved Chunks ===================
81
  def format_docs(docs: List[Document]) -> str:
 
31
  index_name = os.getenv("MONGO_VECTOR_INDEX", "vector_index")
32
 
33
  HF_TOKEN = os.getenv("HF_TOKEN")
 
 
 
 
 
 
34
 
35
  MONGO_URI = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority"
36
 
 
54
  # =================== Vector Search Setup ===================
55
  @st.cache_resource
56
  def init_vector_search() -> MongoDBAtlasVectorSearch:
57
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
58
+ from langchain_community.vectorstores import MongoDBAtlasVectorSearch
59
+
60
+ HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
61
+ model_name = "sentence-transformers/paraphrase-MiniLM-L6-v2"
62
+
63
  try:
64
+ st.write(f"πŸ”Œ Connecting to Hugging Face model: `{model_name}`")
65
+ embedding_model = HuggingFaceInferenceAPIEmbeddings(
66
+ api_key=HF_TOKEN,
67
+ model_name=model_name
68
+ )
69
+
70
+ # Test if embedding works
71
+ test_vector = embedding_model.embed_query("Test query for Grant Buddy")
72
+ st.success(f"βœ… HF embedding model connected. Vector length: {len(test_vector)}")
73
+
74
  except Exception as e:
75
+ st.error("❌ Failed to connect to Hugging Face Embedding API")
76
+ st.error(f"Error: {e}")
77
+ raise e # Stop app here if embedding fails
78
+
79
+ # MongoDB setup
80
+ user = quote_plus(os.getenv("MONGO_USERNAME", "").strip())
81
+ password = quote_plus(os.getenv("MONGO_PASSWORD", "").strip())
82
+ cluster = os.getenv("MONGO_CLUSTER", "").strip()
83
+ db_name = os.getenv("MONGO_DB_NAME", "files").strip()
84
+ collection_name = os.getenv("MONGO_COLLECTION", "files_collection").strip()
85
+ index_name = os.getenv("MONGO_VECTOR_INDEX", "vector_index").strip()
86
+
87
+ MONGO_URI = f"mongodb+srv://{user}:{password}@{cluster}/{db_name}?retryWrites=true&w=majority"
88
+
89
+ # Connect to vector search
90
+ try:
91
+ vector_store = MongoDBAtlasVectorSearch.from_connection_string(
92
+ connection_string=MONGO_URI,
93
+ namespace=f"{db_name}.{collection_name}",
94
+ embedding=embedding_model,
95
+ index_name=index_name
96
+ )
97
+ st.success("βœ… Connected to MongoDB Vector Search")
98
+ return vector_store
99
+
100
+ except Exception as e:
101
+ st.error("❌ Failed to connect to MongoDB Atlas Vector Search")
102
+ st.error(f"Error: {e}")
103
+ raise e
104
+
105
 
106
  # =================== Format Retrieved Chunks ===================
107
  def format_docs(docs: List[Document]) -> str: